PTE markers were previously only concerned with UFFD-specific logic - that is, PTE entries with the UFFD WP marker set or those marked via UFFDIO_POISON. However since the introduction of guard markers in commit 7c53dfbdb024 ("mm: add PTE_MARKER_GUARD PTE marker"), this has no longer been the case. Issues have been avoided as guard regions are not permitted in conjunction with UFFD, but it still leaves very confusing logic in place, most notably the misleading and poorly named pte_none_mostly() and huge_pte_none_mostly(). This predicate returns true for PTE entries that ought to be treated as none, but only in certain circumstances, and on the assumption we are dealing with H/W poison markers or UFFD WP markers. This patch removes these functions and makes each invocation of these functions instead explicitly check what it needs to check. As part of this effort it introduces is_uffd_pte_marker() to explicitly determine if a marker in fact is used as part of UFFD or not. In the HMM logic we note that the only time we would need to check for a fault is in the case of a UFFD WP marker, otherwise we simply encounter a fault error (VM_FAULT_HWPOISON for H/W poisoned marker, VM_FAULT_SIGSEGV for a guard marker), so only check for the UFFD WP case. While we're here we also refactor code to make it easier to understand. Reviewed-by: Vlastimil Babka Signed-off-by: Lorenzo Stoakes --- fs/userfaultfd.c | 83 +++++++++++++++++++---------------- include/asm-generic/hugetlb.h | 8 ---- include/linux/swapops.h | 18 -------- include/linux/userfaultfd_k.h | 21 +++++++++ mm/hmm.c | 2 +- mm/hugetlb.c | 47 ++++++++++---------- mm/mincore.c | 17 +++++-- mm/userfaultfd.c | 27 +++++++----- 8 files changed, 123 insertions(+), 100 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 54c6cc7fe9c6..04c66b5001d5 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -233,40 +233,46 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, { struct vm_area_struct *vma = vmf->vma; pte_t *ptep, pte; - bool ret = true; assert_fault_locked(vmf); ptep = hugetlb_walk(vma, vmf->address, vma_mmu_pagesize(vma)); if (!ptep) - goto out; + return true; - ret = false; pte = huge_ptep_get(vma->vm_mm, vmf->address, ptep); /* * Lockless access: we're in a wait_event so it's ok if it - * changes under us. PTE markers should be handled the same as none - * ptes here. + * changes under us. */ - if (huge_pte_none_mostly(pte)) - ret = true; + + /* If missing entry, wait for handler. */ + if (huge_pte_none(pte)) + return true; + /* UFFD PTE markers require handling. */ + if (is_uffd_pte_marker(pte)) + return true; + /* If VMA has UFFD WP faults enabled and WP fault, wait for handler. */ if (!huge_pte_write(pte) && (reason & VM_UFFD_WP)) - ret = true; -out: - return ret; + return true; + + /* Otherwise, if entry isn't present, let fault handler deal with it. */ + return false; } #else static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, struct vm_fault *vmf, unsigned long reason) { - return false; /* should never get here */ + /* Should never get here. */ + VM_WARN_ON_ONCE(1); + return false; } #endif /* CONFIG_HUGETLB_PAGE */ /* - * Verify the pagetables are still not ok after having reigstered into + * Verify the pagetables are still not ok after having registered into * the fault_pending_wqh to avoid userland having to UFFDIO_WAKE any * userfault that has already been resolved, if userfaultfd_read_iter and * UFFDIO_COPY|ZEROPAGE are being run simultaneously on two different @@ -284,53 +290,55 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, pmd_t *pmd, _pmd; pte_t *pte; pte_t ptent; - bool ret = true; + bool ret; assert_fault_locked(vmf); pgd = pgd_offset(mm, address); if (!pgd_present(*pgd)) - goto out; + return true; p4d = p4d_offset(pgd, address); if (!p4d_present(*p4d)) - goto out; + return true; pud = pud_offset(p4d, address); if (!pud_present(*pud)) - goto out; + return true; pmd = pmd_offset(pud, address); again: _pmd = pmdp_get_lockless(pmd); if (pmd_none(_pmd)) - goto out; + return true; - ret = false; if (!pmd_present(_pmd)) - goto out; + return false; - if (pmd_trans_huge(_pmd)) { - if (!pmd_write(_pmd) && (reason & VM_UFFD_WP)) - ret = true; - goto out; - } + if (pmd_trans_huge(_pmd)) + return !pmd_write(_pmd) && (reason & VM_UFFD_WP); pte = pte_offset_map(pmd, address); - if (!pte) { - ret = true; + if (!pte) goto again; - } + /* * Lockless access: we're in a wait_event so it's ok if it - * changes under us. PTE markers should be handled the same as none - * ptes here. + * changes under us. */ ptent = ptep_get(pte); - if (pte_none_mostly(ptent)) - ret = true; + + ret = true; + /* If missing entry, wait for handler. */ + if (pte_none(ptent)) + goto out; + /* UFFD PTE markers require handling. */ + if (is_uffd_pte_marker(ptent)) + goto out; + /* If VMA has UFFD WP faults enabled and WP fault, wait for handler. */ if (!pte_write(ptent) && (reason & VM_UFFD_WP)) - ret = true; - pte_unmap(pte); + goto out; + ret = false; out: + pte_unmap(pte); return ret; } @@ -490,12 +498,13 @@ vm_fault_t handle_userfault(struct vm_fault *vmf, unsigned long reason) set_current_state(blocking_state); spin_unlock_irq(&ctx->fault_pending_wqh.lock); - if (!is_vm_hugetlb_page(vma)) - must_wait = userfaultfd_must_wait(ctx, vmf, reason); - else + if (is_vm_hugetlb_page(vma)) { must_wait = userfaultfd_huge_must_wait(ctx, vmf, reason); - if (is_vm_hugetlb_page(vma)) hugetlb_vma_unlock_read(vma); + } else { + must_wait = userfaultfd_must_wait(ctx, vmf, reason); + } + release_fault_lock(vmf); if (likely(must_wait && !READ_ONCE(ctx->released))) { diff --git a/include/asm-generic/hugetlb.h b/include/asm-generic/hugetlb.h index dcb8727f2b82..e1a2e1b7c8e7 100644 --- a/include/asm-generic/hugetlb.h +++ b/include/asm-generic/hugetlb.h @@ -97,14 +97,6 @@ static inline int huge_pte_none(pte_t pte) } #endif -/* Please refer to comments above pte_none_mostly() for the usage */ -#ifndef __HAVE_ARCH_HUGE_PTE_NONE_MOSTLY -static inline int huge_pte_none_mostly(pte_t pte) -{ - return huge_pte_none(pte) || is_pte_marker(pte); -} -#endif - #ifndef __HAVE_ARCH_HUGE_PTEP_SET_WRPROTECT static inline void huge_ptep_set_wrprotect(struct mm_struct *mm, unsigned long addr, pte_t *ptep) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 2687928a8146..d1f665935cfc 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -469,24 +469,6 @@ static inline int is_guard_swp_entry(swp_entry_t entry) (pte_marker_get(entry) & PTE_MARKER_GUARD); } -/* - * This is a special version to check pte_none() just to cover the case when - * the pte is a pte marker. It existed because in many cases the pte marker - * should be seen as a none pte; it's just that we have stored some information - * onto the none pte so it becomes not-none any more. - * - * It should be used when the pte is file-backed, ram-based and backing - * userspace pages, like shmem. It is not needed upon pgtables that do not - * support pte markers at all. For example, it's not needed on anonymous - * memory, kernel-only memory (including when the system is during-boot), - * non-ram based generic file-system. It's fine to be used even there, but the - * extra pte marker check will be pure overhead. - */ -static inline int pte_none_mostly(pte_t pte) -{ - return pte_none(pte) || is_pte_marker(pte); -} - static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry) { struct page *p = pfn_to_page(swp_offset_pfn(entry)); diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index c0e716aec26a..da0b4fcc566f 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -479,4 +479,25 @@ static inline bool pte_swp_uffd_wp_any(pte_t pte) return false; } + +static inline bool is_uffd_pte_marker(pte_t pte) +{ + swp_entry_t entry; + + if (pte_present(pte)) + return false; + + entry = pte_to_swp_entry(pte); + if (!is_pte_marker_entry(entry)) + return false; + + /* UFFD WP, poisoned swap entries are UFFD handled. */ + if (pte_marker_entry_uffd_wp(entry)) + return true; + if (is_poisoned_swp_entry(entry)) + return true; + + return false; +} + #endif /* _LINUX_USERFAULTFD_K_H */ diff --git a/mm/hmm.c b/mm/hmm.c index a56081d67ad6..43d4a91035ff 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -244,7 +244,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, uint64_t pfn_req_flags = *hmm_pfn; uint64_t new_pfn_flags = 0; - if (pte_none_mostly(pte)) { + if (pte_none(pte) || pte_marker_uffd_wp(pte)) { required_fault = hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0); if (required_fault) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 1ea459723cce..01c784547d1e 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -6743,29 +6743,28 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, } vmf.orig_pte = huge_ptep_get(mm, vmf.address, vmf.pte); - if (huge_pte_none_mostly(vmf.orig_pte)) { - if (is_pte_marker(vmf.orig_pte)) { - pte_marker marker = - pte_marker_get(pte_to_swp_entry(vmf.orig_pte)); - - if (marker & PTE_MARKER_POISONED) { - ret = VM_FAULT_HWPOISON_LARGE | - VM_FAULT_SET_HINDEX(hstate_index(h)); - goto out_mutex; - } else if (WARN_ON_ONCE(marker & PTE_MARKER_GUARD)) { - /* This isn't supported in hugetlb. */ - ret = VM_FAULT_SIGSEGV; - goto out_mutex; - } - } - + if (huge_pte_none(vmf.orig_pte)) /* - * Other PTE markers should be handled the same way as none PTE. - * * hugetlb_no_page will drop vma lock and hugetlb fault * mutex internally, which make us return immediately. */ return hugetlb_no_page(mapping, &vmf); + + if (is_pte_marker(vmf.orig_pte)) { + const pte_marker marker = + pte_marker_get(pte_to_swp_entry(vmf.orig_pte)); + + if (marker & PTE_MARKER_POISONED) { + ret = VM_FAULT_HWPOISON_LARGE | + VM_FAULT_SET_HINDEX(hstate_index(h)); + goto out_mutex; + } else if (WARN_ON_ONCE(marker & PTE_MARKER_GUARD)) { + /* This isn't supported in hugetlb. */ + ret = VM_FAULT_SIGSEGV; + goto out_mutex; + } + + return hugetlb_no_page(mapping, &vmf); } ret = 0; @@ -6934,6 +6933,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, int ret = -ENOMEM; struct folio *folio; bool folio_in_pagecache = false; + pte_t dst_ptep; if (uffd_flags_mode_is(flags, MFILL_ATOMIC_POISON)) { ptl = huge_pte_lock(h, dst_mm, dst_pte); @@ -7073,13 +7073,14 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, if (folio_test_hwpoison(folio)) goto out_release_unlock; + ret = -EEXIST; + + dst_ptep = huge_ptep_get(dst_mm, dst_addr, dst_pte); /* - * We allow to overwrite a pte marker: consider when both MISSING|WP - * registered, we firstly wr-protect a none pte which has no page cache - * page backing it, then access the page. + * See comment about UFFD marker overwriting in + * mfill_atomic_install_pte(). */ - ret = -EEXIST; - if (!huge_pte_none_mostly(huge_ptep_get(dst_mm, dst_addr, dst_pte))) + if (!huge_pte_none(dst_ptep) && !is_uffd_pte_marker(dst_ptep)) goto out_release_unlock; if (folio_in_pagecache) diff --git a/mm/mincore.c b/mm/mincore.c index 8ec4719370e1..151b2dbb783b 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -32,11 +32,22 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, spinlock_t *ptl; ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); + /* * Hugepages under user process are always in RAM and never * swapped out, but theoretically it needs to be checked. */ - present = pte && !huge_pte_none_mostly(huge_ptep_get(walk->mm, addr, pte)); + if (!pte) { + present = 0; + } else { + const pte_t ptep = huge_ptep_get(walk->mm, addr, pte); + + if (huge_pte_none(ptep) || is_pte_marker(ptep)) + present = 0; + else + present = 1; + } + for (; addr != end; vec++, addr += PAGE_SIZE) *vec = present; walk->private = vec; @@ -175,8 +186,8 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, pte_t pte = ptep_get(ptep); step = 1; - /* We need to do cache lookup too for pte markers */ - if (pte_none_mostly(pte)) + /* We need to do cache lookup too for UFFD pte markers */ + if (pte_none(pte) || is_uffd_pte_marker(pte)) __mincore_unmapped_range(addr, addr + PAGE_SIZE, vma, vec); else if (pte_present(pte)) { diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 00122f42718c..cc4ce205bbec 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -178,6 +178,7 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd, spinlock_t *ptl; struct folio *folio = page_folio(page); bool page_in_cache = folio_mapping(folio); + pte_t dst_ptep; _dst_pte = mk_pte(page, dst_vma->vm_page_prot); _dst_pte = pte_mkdirty(_dst_pte); @@ -199,12 +200,15 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd, } ret = -EEXIST; + + dst_ptep = ptep_get(dst_pte); + /* - * We allow to overwrite a pte marker: consider when both MISSING|WP - * registered, we firstly wr-protect a none pte which has no page cache - * page backing it, then access the page. + * We are allowed to overwrite a UFFD pte marker: consider when both + * MISSING|WP registered, we firstly wr-protect a none pte which has no + * page cache page backing it, then access the page. */ - if (!pte_none_mostly(ptep_get(dst_pte))) + if (!pte_none(dst_ptep) && !is_uffd_pte_marker(dst_ptep)) goto out_unlock; if (page_in_cache) { @@ -583,12 +587,15 @@ static __always_inline ssize_t mfill_atomic_hugetlb( goto out_unlock; } - if (!uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE) && - !huge_pte_none_mostly(huge_ptep_get(dst_mm, dst_addr, dst_pte))) { - err = -EEXIST; - hugetlb_vma_unlock_read(dst_vma); - mutex_unlock(&hugetlb_fault_mutex_table[hash]); - goto out_unlock; + if (!uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) { + const pte_t ptep = huge_ptep_get(dst_mm, dst_addr, dst_pte); + + if (!huge_pte_none(ptep) && !is_uffd_pte_marker(ptep)) { + err = -EEXIST; + hugetlb_vma_unlock_read(dst_vma); + mutex_unlock(&hugetlb_fault_mutex_table[hash]); + goto out_unlock; + } } err = hugetlb_mfill_atomic_pte(dst_pte, dst_vma, dst_addr, -- 2.51.0 The kernel maintains leaf page table entries which contain either: - Nothing ('none' entries) - Present entries (that is stuff the hardware can navigate without fault) - Everything else that will cause a fault which the kernel handles In the 'everything else' group we include swap entries, but we also include a number of other things such as migration entries, device private entries and marker entries. Unfortunately this 'everything else' group expresses everything through a swp_entry_t type, and these entries are referred to swap entries even though they may well not contain a... swap entry. This is compounded by the rather mind-boggling concept of a non-swap swap entry (checked via non_swap_entry()) and the means by which we twist and turn to satisfy this. This patch lays the foundation for reducing this confusion. We refer to 'everything else' as a 'software-define leaf entry' or 'softleaf'. for short And in fact we scoop up the 'none' entries into this concept also so we are left with: - Present entries. - Softleaf entries (which may be empty). This allows for radical simplification across the board - one can simply convert any leaf page table entry to a leaf entry via softleaf_from_pte(). If the entry is present, we return an empty leaf entry, so it is assumed the caller is aware that they must differentiate between the two categories of page table entries, checking for the former via pte_present(). As a result, we can eliminate a number of places where we would otherwise need to use predicates to see if we can proceed with leaf page table entry conversion and instead just go ahead and do it unconditionally. We do so where we can, adjusting surrounding logic as necessary to integrate the new softleaf_t logic as far as seems reasonable at this stage. We typedef swp_entry_t to softleaf_t for the time being until the conversion can be complete, meaning everything remains compatible regardless of which type is used. We will eventually remove swp_entry_t when the conversion is complete. We introduce a new header file to keep things clear - leafops.h - this imports swapops.h so can direct replace swapops imports without issue, and we do so in all the files that require it. Additionally, add new leafops.h file to core mm maintainers entry. Signed-off-by: Lorenzo Stoakes --- MAINTAINERS | 1 + fs/proc/task_mmu.c | 26 +-- fs/userfaultfd.c | 6 +- include/linux/leafops.h | 382 ++++++++++++++++++++++++++++++++++ include/linux/mm_inline.h | 6 +- include/linux/mm_types.h | 25 +++ include/linux/swapops.h | 28 --- include/linux/userfaultfd_k.h | 51 +---- mm/hmm.c | 2 +- mm/hugetlb.c | 37 ++-- mm/madvise.c | 16 +- mm/memory.c | 41 ++-- mm/mincore.c | 6 +- mm/mprotect.c | 6 +- mm/mremap.c | 4 +- mm/page_vma_mapped.c | 11 +- mm/shmem.c | 7 +- mm/userfaultfd.c | 6 +- 18 files changed, 497 insertions(+), 164 deletions(-) create mode 100644 include/linux/leafops.h diff --git a/MAINTAINERS b/MAINTAINERS index 2628431dcdfe..314910a70bbf 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -16257,6 +16257,7 @@ T: git git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm F: include/linux/gfp.h F: include/linux/gfp_types.h F: include/linux/highmem.h +F: include/linux/leafops.h F: include/linux/memory.h F: include/linux/mm.h F: include/linux/mm_*.h diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index fc35a0543f01..24d26b49d870 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -1230,11 +1230,11 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask, if (pte_present(ptent)) { folio = page_folio(pte_page(ptent)); present = true; - } else if (is_swap_pte(ptent)) { - swp_entry_t swpent = pte_to_swp_entry(ptent); + } else { + const softleaf_t entry = softleaf_from_pte(ptent); - if (is_pfn_swap_entry(swpent)) - folio = pfn_swap_entry_folio(swpent); + if (softleaf_has_pfn(entry)) + folio = softleaf_to_folio(entry); } if (folio) { @@ -1955,9 +1955,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, flags |= PM_SWAP; if (is_pfn_swap_entry(entry)) page = pfn_swap_entry_to_page(entry); - if (pte_marker_entry_uffd_wp(entry)) + if (softleaf_is_uffd_wp_marker(entry)) flags |= PM_UFFD_WP; - if (is_guard_swp_entry(entry)) + if (softleaf_is_guard_marker(entry)) flags |= PM_GUARD_REGION; } @@ -2330,18 +2330,18 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p, if (pte_soft_dirty(pte)) categories |= PAGE_IS_SOFT_DIRTY; } else if (is_swap_pte(pte)) { - swp_entry_t swp; + softleaf_t entry; categories |= PAGE_IS_SWAPPED; if (!pte_swp_uffd_wp_any(pte)) categories |= PAGE_IS_WRITTEN; - swp = pte_to_swp_entry(pte); - if (is_guard_swp_entry(swp)) + entry = softleaf_from_pte(pte); + if (softleaf_is_guard_marker(entry)) categories |= PAGE_IS_GUARD; else if ((p->masks_of_interest & PAGE_IS_FILE) && - is_pfn_swap_entry(swp) && - !folio_test_anon(pfn_swap_entry_folio(swp))) + softleaf_has_pfn(entry) && + !folio_test_anon(softleaf_to_folio(entry))) categories |= PAGE_IS_FILE; if (pte_swp_soft_dirty(pte)) @@ -2466,7 +2466,7 @@ static void make_uffd_wp_huge_pte(struct vm_area_struct *vma, { unsigned long psize; - if (is_hugetlb_entry_hwpoisoned(ptent) || is_pte_marker(ptent)) + if (is_hugetlb_entry_hwpoisoned(ptent) || pte_is_marker(ptent)) return; psize = huge_page_size(hstate_vma(vma)); diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 04c66b5001d5..e33e7df36927 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include @@ -251,7 +251,7 @@ static inline bool userfaultfd_huge_must_wait(struct userfaultfd_ctx *ctx, if (huge_pte_none(pte)) return true; /* UFFD PTE markers require handling. */ - if (is_uffd_pte_marker(pte)) + if (pte_is_uffd_marker(pte)) return true; /* If VMA has UFFD WP faults enabled and WP fault, wait for handler. */ if (!huge_pte_write(pte) && (reason & VM_UFFD_WP)) @@ -330,7 +330,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, if (pte_none(ptent)) goto out; /* UFFD PTE markers require handling. */ - if (is_uffd_pte_marker(ptent)) + if (pte_is_uffd_marker(ptent)) goto out; /* If VMA has UFFD WP faults enabled and WP fault, wait for handler. */ if (!pte_write(ptent) && (reason & VM_UFFD_WP)) diff --git a/include/linux/leafops.h b/include/linux/leafops.h new file mode 100644 index 000000000000..1376589d94b0 --- /dev/null +++ b/include/linux/leafops.h @@ -0,0 +1,382 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Describes operations that can be performed on software-defined page table + * leaf entries. These are abstracted from the hardware page table entries + * themselves by the softleaf_t type, see mm_types.h. + */ +#ifndef _LINUX_LEAFOPS_H +#define _LINUX_LEAFOPS_H + +#include +#include +#include + +#ifdef CONFIG_MMU + +/* Temporary until swp_entry_t eliminated. */ +#define LEAF_TYPE_SHIFT SWP_TYPE_SHIFT + +enum softleaf_type { + /* Fundamental types. */ + SOFTLEAF_NONE, + SOFTLEAF_SWAP, + /* Migration types. */ + SOFTLEAF_MIGRATION_READ, + SOFTLEAF_MIGRATION_READ_EXCLUSIVE, + SOFTLEAF_MIGRATION_WRITE, + /* Device types. */ + SOFTLEAF_DEVICE_PRIVATE_READ, + SOFTLEAF_DEVICE_PRIVATE_WRITE, + SOFTLEAF_DEVICE_EXCLUSIVE, + /* H/W posion types. */ + SOFTLEAF_HWPOISON, + /* Marker types. */ + SOFTLEAF_MARKER, +}; + +/** + * softleaf_mk_none() - Create an empty ('none') leaf entry. + * Returns: empty leaf entry. + */ +static inline softleaf_t softleaf_mk_none(void) +{ + return ((softleaf_t) { 0 }); +} + +/** + * softleaf_from_pte() - Obtain a leaf entry from a PTE entry. + * @pte: PTE entry. + * + * If @pte is present (therefore not a leaf entry) the function returns an empty + * leaf entry. Otherwise, it returns a leaf entry. + * + * Returns: Leaf entry. + */ +static inline softleaf_t softleaf_from_pte(pte_t pte) +{ + if (pte_present(pte)) + return softleaf_mk_none(); + + /* Temporary until swp_entry_t eliminated. */ + return pte_to_swp_entry(pte); +} + +/** + * softleaf_is_none() - Is the leaf entry empty? + * @entry: Leaf entry. + * + * Empty entries are typically the result of a 'none' page table leaf entry + * being converted to a leaf entry. + * + * Returns: true if the entry is empty, false otherwise. + */ +static inline bool softleaf_is_none(softleaf_t entry) +{ + return entry.val == 0; +} + +/** + * softleaf_type() - Identify the type of leaf entry. + * @enntry: Leaf entry. + * + * Returns: the leaf entry type associated with @entry. + */ +static inline enum softleaf_type softleaf_type(softleaf_t entry) +{ + unsigned int type_num; + + if (softleaf_is_none(entry)) + return SOFTLEAF_NONE; + + type_num = entry.val >> LEAF_TYPE_SHIFT; + + if (type_num < MAX_SWAPFILES) + return SOFTLEAF_SWAP; + + switch (type_num) { +#ifdef CONFIG_MIGRATION + case SWP_MIGRATION_READ: + return SOFTLEAF_MIGRATION_READ; + case SWP_MIGRATION_READ_EXCLUSIVE: + return SOFTLEAF_MIGRATION_READ_EXCLUSIVE; + case SWP_MIGRATION_WRITE: + return SOFTLEAF_MIGRATION_WRITE; +#endif +#ifdef CONFIG_DEVICE_PRIVATE + case SWP_DEVICE_WRITE: + return SOFTLEAF_DEVICE_PRIVATE_WRITE; + case SWP_DEVICE_READ: + return SOFTLEAF_DEVICE_PRIVATE_READ; + case SWP_DEVICE_EXCLUSIVE: + return SOFTLEAF_DEVICE_EXCLUSIVE; +#endif +#ifdef CONFIG_MEMORY_FAILURE + case SWP_HWPOISON: + return SOFTLEAF_HWPOISON; +#endif + case SWP_PTE_MARKER: + return SOFTLEAF_MARKER; + } + + /* Unknown entry type. */ + VM_WARN_ON_ONCE(1); + return SOFTLEAF_NONE; +} + +/** + * softleaf_is_swap() - Is this leaf entry a swap entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a swap entry, otherwise false. + */ +static inline bool softleaf_is_swap(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_SWAP; +} + +/** + * softleaf_is_swap() - Is this leaf entry a migration entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a migration entry, otherwise false. + */ +static inline bool softleaf_is_migration(softleaf_t entry) +{ + switch (softleaf_type(entry)) { + case SOFTLEAF_MIGRATION_READ: + case SOFTLEAF_MIGRATION_READ_EXCLUSIVE: + case SOFTLEAF_MIGRATION_WRITE: + return true; + default: + return false; + } +} + +/** + * softleaf_is_device_private() - Is this leaf entry a device private entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a device private entry, otherwise false. + */ +static inline bool softleaf_is_device_private(softleaf_t entry) +{ + switch (softleaf_type(entry)) { + case SOFTLEAF_DEVICE_PRIVATE_WRITE: + case SOFTLEAF_DEVICE_PRIVATE_READ: + return true; + default: + return false; + } +} + +static inline bool softleaf_is_device_exclusive(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_DEVICE_EXCLUSIVE; +} + +/** + * softleaf_is_hwpoison() - Is this leaf entry a hardware poison entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a hardware poison entry, otherwise false. + */ +static inline bool softleaf_is_hwpoison(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_HWPOISON; +} + +/** + * softleaf_is_marker() - Is this leaf entry a marker? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a marker entry, otherwise false. + */ +static inline bool softleaf_is_marker(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_MARKER; +} + +/** + * softleaf_to_marker() - Obtain marker associated with leaf entry. + * @entry: Leaf entry, softleaf_is_marker(@entry) must return true. + * + * Returns: Marker associated with the leaf entry. + */ +static inline pte_marker softleaf_to_marker(softleaf_t entry) +{ + VM_WARN_ON_ONCE(!softleaf_is_marker(entry)); + + return swp_offset(entry) & PTE_MARKER_MASK; +} + +/** + * softleaf_has_pfn() - Does this leaf entry encode a valid PFN number? + * @entry: Leaf entry. + * + * A pfn swap entry is a special type of swap entry that always has a pfn stored + * in the swap offset. They can either be used to represent unaddressable device + * memory, to restrict access to a page undergoing migration or to represent a + * pfn which has been hwpoisoned and unmapped. + * + * Returns: true if the leaf entry encodes a PFN, otherwise false. + */ +static inline bool softleaf_has_pfn(softleaf_t entry) +{ + /* Make sure the swp offset can always store the needed fields. */ + BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS); + + if (softleaf_is_migration(entry)) + return true; + if (softleaf_is_device_private(entry)) + return true; + if (softleaf_is_device_exclusive(entry)) + return true; + if (softleaf_is_hwpoison(entry)) + return true; + + return false; +} + +/** + * softleaf_to_pfn() - Obtain PFN encoded within leaf entry. + * @entry: Leaf entry, softleaf_has_pfn(@entry) must return true. + * + * Returns: The PFN associated with the leaf entry. + */ +static inline unsigned long softleaf_to_pfn(softleaf_t entry) +{ + VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); + + /* Temporary until swp_entry_t eliminated. */ + return swp_offset_pfn(entry); +} + +/** + * softleaf_to_page() - Obtains struct page for PFN encoded within leaf entry. + * @entry: Leaf entry, softleaf_has_pfn(@entry) must return true. + * + * Returns: Pointer to the struct page associated with the leaf entry's PFN. + */ +static inline struct page *softleaf_to_page(softleaf_t entry) +{ + VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); + + /* Temporary until swp_entry_t eliminated. */ + return pfn_swap_entry_to_page(entry); +} + +/** + * softleaf_to_folio() - Obtains struct folio for PFN encoded within leaf entry. + * @entry: Leaf entry, softleaf_has_pfn(@entry) must return true. + * + * Returns: Pointer to the struct folio associated with the leaf entry's PFN. + * Returns: + */ +static inline struct folio *softleaf_to_folio(softleaf_t entry) +{ + VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); + + /* Temporary until swp_entry_t eliminated. */ + return pfn_swap_entry_folio(entry); +} + +/** + * softleaf_is_poison_marker() - Is this leaf entry a poison marker? + * @entry: Leaf entry. + * + * The poison marker is set via UFFDIO_POISON. Userfaultfd-specific. + * + * Returns: true if the leaf entry is a poison marker, otherwise false. + */ +static inline bool softleaf_is_poison_marker(softleaf_t entry) +{ + if (!softleaf_is_marker(entry)) + return false; + + return softleaf_to_marker(entry) & PTE_MARKER_POISONED; +} + +/** + * softleaf_is_guard_marker() - Is this leaf entry a guard region marker? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a guard marker, otherwise false. + */ +static inline bool softleaf_is_guard_marker(softleaf_t entry) +{ + if (!softleaf_is_marker(entry)) + return false; + + return softleaf_to_marker(entry) & PTE_MARKER_GUARD; +} + +/** + * softleaf_is_uffd_wp_marker() - Is this leaf entry a userfautlfd write protect + * marker? + * @entry: Leaf entry. + * + * Userfaultfd-specific. + * + * Returns: true if the leaf entry is a UFFD WP marker, otherwise false. + */ +static inline bool softleaf_is_uffd_wp_marker(softleaf_t entry) +{ + if (!softleaf_is_marker(entry)) + return false; + + return softleaf_to_marker(entry) & PTE_MARKER_UFFD_WP; +} + +/** + * pte_is_marker() - Does the PTE entry encode a marker leaf entry? + * @pte: PTE entry. + * + * Returns: true if this PTE is a marker leaf entry, otherwise false. + */ +static inline bool pte_is_marker(pte_t pte) +{ + return softleaf_is_marker(softleaf_from_pte(pte)); +} + +/** + * pte_is_uffd_wp_marker() - Does this PTE entry encode a userfaultfd write + * protect marker leaf entry? + * @pte: PTE entry. + * + * Returns: true if this PTE is a UFFD WP marker leaf entry, otherwise false. + */ +static inline bool pte_is_uffd_wp_marker(pte_t pte) +{ + const softleaf_t entry = softleaf_from_pte(pte); + + return softleaf_is_uffd_wp_marker(entry); +} + +/** + * pte_is_uffd_marker() - Does this PTE entry encode a userfault-specific marker + * leaf entry? + * @entry: Leaf entry. + * + * It's useful to be able to determine which leaf entries encode UFFD-specific + * markers so we can handle these correctly. + * + * Returns: true if this PTE entry is a UFFD-specific marker, otherwise false. + */ +static inline bool pte_is_uffd_marker(pte_t pte) +{ + const softleaf_t entry = softleaf_from_pte(pte); + + if (!softleaf_is_marker(entry)) + return false; + + /* UFFD WP, poisoned swap entries are UFFD-handled. */ + if (softleaf_is_uffd_wp_marker(entry)) + return true; + if (softleaf_is_poison_marker(entry)) + return true; + + return false; +} + +#endif /* CONFIG_MMU */ +#endif /* _LINUX_SWAPOPS_H */ diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index f6a2b2d20016..ca7a18351797 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -8,7 +8,7 @@ #include #include #include -#include +#include /** * folio_is_file_lru - Should the folio be on a file LRU or anon LRU? @@ -541,9 +541,9 @@ static inline bool mm_tlb_flush_nested(const struct mm_struct *mm) * The caller should insert a new pte created with make_pte_marker(). */ static inline pte_marker copy_pte_marker( - swp_entry_t entry, struct vm_area_struct *dst_vma) + softleaf_t entry, struct vm_area_struct *dst_vma) { - pte_marker srcm = pte_marker_get(entry); + const pte_marker srcm = softleaf_to_marker(entry); /* Always copy error entries. */ pte_marker dstm = srcm & (PTE_MARKER_POISONED | PTE_MARKER_GUARD); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 5021047485a9..4f66a3206a63 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -285,6 +285,31 @@ typedef struct { unsigned long val; } swp_entry_t; +/** + * typedef softleaf_t - Describes a page table software leaf entry, abstracted + * from its architecture-specific encoding. + * + * Page table leaf entries are those which do not reference any descendent page + * tables but rather either reference a data page, are an empty (or 'none' + * entry), or contain a non-present entry. + * + * If referencing another page table or a data page then the page table entry is + * pertinent to hardware - that is it tells the hardware how to decode the page + * table entry. + * + * Otherwise it is a software-defined leaf page table entry, which this type + * describes. See leafops.h and specifically @softleaf_type for a list of all + * possible kinds of software leaf entry. + * + * A softleaf_t entry is abstracted from the hardware page table entry, so is + * not architecture-specific. + * + * NOTE: While we transition from the confusing swp_entry_t type used for this + * purpose, we simply alias this type. This will be removed once the + * transition is complete. + */ +typedef swp_entry_t softleaf_t; + #if defined(CONFIG_MEMCG) || defined(CONFIG_SLAB_OBJ_EXT) /* We have some extra room after the refcount in tail pages. */ #define NR_PAGES_IN_LARGE_FOLIO diff --git a/include/linux/swapops.h b/include/linux/swapops.h index d1f665935cfc..0a4b3f51ecf5 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -426,21 +426,6 @@ static inline swp_entry_t make_pte_marker_entry(pte_marker marker) return swp_entry(SWP_PTE_MARKER, marker); } -static inline bool is_pte_marker_entry(swp_entry_t entry) -{ - return swp_type(entry) == SWP_PTE_MARKER; -} - -static inline pte_marker pte_marker_get(swp_entry_t entry) -{ - return swp_offset(entry) & PTE_MARKER_MASK; -} - -static inline bool is_pte_marker(pte_t pte) -{ - return is_swap_pte(pte) && is_pte_marker_entry(pte_to_swp_entry(pte)); -} - static inline pte_t make_pte_marker(pte_marker marker) { return swp_entry_to_pte(make_pte_marker_entry(marker)); @@ -451,24 +436,11 @@ static inline swp_entry_t make_poisoned_swp_entry(void) return make_pte_marker_entry(PTE_MARKER_POISONED); } -static inline int is_poisoned_swp_entry(swp_entry_t entry) -{ - return is_pte_marker_entry(entry) && - (pte_marker_get(entry) & PTE_MARKER_POISONED); - -} - static inline swp_entry_t make_guard_swp_entry(void) { return make_pte_marker_entry(PTE_MARKER_GUARD); } -static inline int is_guard_swp_entry(swp_entry_t entry) -{ - return is_pte_marker_entry(entry) && - (pte_marker_get(entry) & PTE_MARKER_GUARD); -} - static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry) { struct page *p = pfn_to_page(swp_offset_pfn(entry)); diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index da0b4fcc566f..983c860a00f1 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include @@ -434,32 +434,6 @@ static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) return userfaultfd_wp_unpopulated(vma); } -static inline bool pte_marker_entry_uffd_wp(swp_entry_t entry) -{ -#ifdef CONFIG_PTE_MARKER_UFFD_WP - return is_pte_marker_entry(entry) && - (pte_marker_get(entry) & PTE_MARKER_UFFD_WP); -#else - return false; -#endif -} - -static inline bool pte_marker_uffd_wp(pte_t pte) -{ -#ifdef CONFIG_PTE_MARKER_UFFD_WP - swp_entry_t entry; - - if (!is_swap_pte(pte)) - return false; - - entry = pte_to_swp_entry(pte); - - return pte_marker_entry_uffd_wp(entry); -#else - return false; -#endif -} - /* * Returns true if this is a swap pte and was uffd-wp wr-protected in either * forms (pte marker or a normal swap pte), false otherwise. @@ -473,31 +447,10 @@ static inline bool pte_swp_uffd_wp_any(pte_t pte) if (pte_swp_uffd_wp(pte)) return true; - if (pte_marker_uffd_wp(pte)) + if (pte_is_uffd_wp_marker(pte)) return true; #endif return false; } - -static inline bool is_uffd_pte_marker(pte_t pte) -{ - swp_entry_t entry; - - if (pte_present(pte)) - return false; - - entry = pte_to_swp_entry(pte); - if (!is_pte_marker_entry(entry)) - return false; - - /* UFFD WP, poisoned swap entries are UFFD handled. */ - if (pte_marker_entry_uffd_wp(entry)) - return true; - if (is_poisoned_swp_entry(entry)) - return true; - - return false; -} - #endif /* _LINUX_USERFAULTFD_K_H */ diff --git a/mm/hmm.c b/mm/hmm.c index 43d4a91035ff..b11b4ebba945 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -244,7 +244,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, uint64_t pfn_req_flags = *hmm_pfn; uint64_t new_pfn_flags = 0; - if (pte_none(pte) || pte_marker_uffd_wp(pte)) { + if (pte_none(pte) || pte_is_uffd_wp_marker(pte)) { required_fault = hmm_pte_need_fault(hmm_vma_walk, pfn_req_flags, 0); if (required_fault) diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 01c784547d1e..a05edefec1ca 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include @@ -5662,17 +5662,17 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, entry = huge_pte_clear_uffd_wp(entry); set_huge_pte_at(dst, addr, dst_pte, entry, sz); } else if (unlikely(is_hugetlb_entry_migration(entry))) { - swp_entry_t swp_entry = pte_to_swp_entry(entry); + softleaf_t softleaf = softleaf_from_pte(entry); bool uffd_wp = pte_swp_uffd_wp(entry); - if (!is_readable_migration_entry(swp_entry) && cow) { + if (!is_readable_migration_entry(softleaf) && cow) { /* * COW mappings require pages in both * parent and child to be set to read. */ - swp_entry = make_readable_migration_entry( - swp_offset(swp_entry)); - entry = swp_entry_to_pte(swp_entry); + softleaf = make_readable_migration_entry( + swp_offset(softleaf)); + entry = swp_entry_to_pte(softleaf); if (userfaultfd_wp(src_vma) && uffd_wp) entry = pte_swp_mkuffd_wp(entry); set_huge_pte_at(src, addr, src_pte, entry, sz); @@ -5680,9 +5680,9 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, if (!userfaultfd_wp(dst_vma)) entry = huge_pte_clear_uffd_wp(entry); set_huge_pte_at(dst, addr, dst_pte, entry, sz); - } else if (unlikely(is_pte_marker(entry))) { - pte_marker marker = copy_pte_marker( - pte_to_swp_entry(entry), dst_vma); + } else if (unlikely(pte_is_marker(entry))) { + const softleaf_t softleaf = softleaf_from_pte(entry); + const pte_marker marker = copy_pte_marker(softleaf, dst_vma); if (marker) set_huge_pte_at(dst, addr, dst_pte, @@ -5798,7 +5798,7 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, pte = huge_ptep_get_and_clear(mm, old_addr, src_pte, sz); - if (need_clear_uffd_wp && pte_marker_uffd_wp(pte)) + if (need_clear_uffd_wp && pte_is_uffd_wp_marker(pte)) huge_pte_clear(mm, new_addr, dst_pte, sz); else { if (need_clear_uffd_wp) { @@ -6617,7 +6617,7 @@ static vm_fault_t hugetlb_no_page(struct address_space *mapping, * If this pte was previously wr-protected, keep it wr-protected even * if populated. */ - if (unlikely(pte_marker_uffd_wp(vmf->orig_pte))) + if (unlikely(pte_is_uffd_wp_marker(vmf->orig_pte))) new_pte = huge_pte_mkuffd_wp(new_pte); set_huge_pte_at(mm, vmf->address, vmf->pte, new_pte, huge_page_size(h)); @@ -6750,9 +6750,9 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, */ return hugetlb_no_page(mapping, &vmf); - if (is_pte_marker(vmf.orig_pte)) { + if (pte_is_marker(vmf.orig_pte)) { const pte_marker marker = - pte_marker_get(pte_to_swp_entry(vmf.orig_pte)); + softleaf_to_marker(softleaf_from_pte(vmf.orig_pte)); if (marker & PTE_MARKER_POISONED) { ret = VM_FAULT_HWPOISON_LARGE | @@ -7080,7 +7080,7 @@ int hugetlb_mfill_atomic_pte(pte_t *dst_pte, * See comment about UFFD marker overwriting in * mfill_atomic_install_pte(). */ - if (!huge_pte_none(dst_ptep) && !is_uffd_pte_marker(dst_ptep)) + if (!huge_pte_none(dst_ptep) && !pte_is_uffd_marker(dst_ptep)) goto out_release_unlock; if (folio_in_pagecache) @@ -7201,8 +7201,9 @@ long hugetlb_change_protection(struct vm_area_struct *vma, if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) { /* Nothing to do. */ } else if (unlikely(is_hugetlb_entry_migration(pte))) { - swp_entry_t entry = pte_to_swp_entry(pte); - struct folio *folio = pfn_swap_entry_folio(entry); + softleaf_t entry = softleaf_from_pte(pte); + + struct folio *folio = softleaf_to_folio(entry); pte_t newpte = pte; if (is_writable_migration_entry(entry)) { @@ -7222,14 +7223,14 @@ long hugetlb_change_protection(struct vm_area_struct *vma, newpte = pte_swp_clear_uffd_wp(newpte); if (!pte_same(pte, newpte)) set_huge_pte_at(mm, address, ptep, newpte, psize); - } else if (unlikely(is_pte_marker(pte))) { + } else if (unlikely(pte_is_marker(pte))) { /* * Do nothing on a poison marker; page is * corrupted, permissions do not apply. Here * pte_marker_uffd_wp()==true implies !poison * because they're mutual exclusive. */ - if (pte_marker_uffd_wp(pte) && uffd_wp_resolve) + if (pte_is_uffd_wp_marker(pte) && uffd_wp_resolve) /* Safe to modify directly (non-present->none). */ huge_pte_clear(mm, address, ptep, psize); } else if (!huge_pte_none(pte)) { diff --git a/mm/madvise.c b/mm/madvise.c index fb1c86e630b6..2d5ad3cb37bb 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include @@ -690,17 +690,16 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, * (page allocation + zeroing). */ if (!pte_present(ptent)) { - swp_entry_t entry; + softleaf_t entry = softleaf_from_pte(ptent); - entry = pte_to_swp_entry(ptent); - if (!non_swap_entry(entry)) { + if (softleaf_is_swap(entry)) { max_nr = (end - addr) / PAGE_SIZE; nr = swap_pte_batch(pte, max_nr, ptent); nr_swap -= nr; free_swap_and_cache_nr(entry, nr); clear_not_present_full_ptes(mm, addr, pte, nr, tlb->fullmm); - } else if (is_hwpoison_entry(entry) || - is_poisoned_swp_entry(entry)) { + } else if (softleaf_is_hwpoison(entry) || + softleaf_is_poison_marker(entry)) { pte_clear_not_present_full(mm, addr, pte, tlb->fullmm); } continue; @@ -1071,8 +1070,9 @@ static bool is_valid_guard_vma(struct vm_area_struct *vma, bool allow_locked) static bool is_guard_pte_marker(pte_t ptent) { - return is_swap_pte(ptent) && - is_guard_swp_entry(pte_to_swp_entry(ptent)); + const softleaf_t entry = softleaf_from_pte(ptent); + + return softleaf_is_guard_marker(entry); } static int guard_install_pud_entry(pud_t *pud, unsigned long addr, diff --git a/mm/memory.c b/mm/memory.c index 4c3a7e09a159..7493ed084b99 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -60,7 +60,7 @@ #include #include #include -#include +#include #include #include #include @@ -109,7 +109,7 @@ static __always_inline bool vmf_orig_pte_uffd_wp(struct vm_fault *vmf) if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)) return false; - return pte_marker_uffd_wp(vmf->orig_pte); + return pte_is_uffd_wp_marker(vmf->orig_pte); } /* @@ -927,10 +927,10 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, { vm_flags_t vm_flags = dst_vma->vm_flags; pte_t orig_pte = ptep_get(src_pte); + softleaf_t entry = softleaf_from_pte(orig_pte); pte_t pte = orig_pte; struct folio *folio; struct page *page; - swp_entry_t entry = pte_to_swp_entry(orig_pte); if (likely(!non_swap_entry(entry))) { if (swap_duplicate(entry) < 0) @@ -1016,7 +1016,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, if (try_restore_exclusive_pte(src_vma, addr, src_pte, orig_pte)) return -EBUSY; return -ENOENT; - } else if (is_pte_marker_entry(entry)) { + } else if (softleaf_is_marker(entry)) { pte_marker marker = copy_pte_marker(entry, dst_vma); if (marker) @@ -1717,14 +1717,14 @@ static inline int zap_nonpresent_ptes(struct mmu_gather *tlb, unsigned int max_nr, unsigned long addr, struct zap_details *details, int *rss, bool *any_skipped) { - swp_entry_t entry; + softleaf_t entry; int nr = 1; *any_skipped = true; - entry = pte_to_swp_entry(ptent); - if (is_device_private_entry(entry) || - is_device_exclusive_entry(entry)) { - struct page *page = pfn_swap_entry_to_page(entry); + entry = softleaf_from_pte(ptent); + if (softleaf_is_device_private(entry) || + softleaf_is_device_exclusive(entry)) { + struct page *page = softleaf_to_page(entry); struct folio *folio = page_folio(page); if (unlikely(!should_zap_folio(details, folio))) @@ -1739,7 +1739,7 @@ static inline int zap_nonpresent_ptes(struct mmu_gather *tlb, rss[mm_counter(folio)]--; folio_remove_rmap_pte(folio, page, vma); folio_put(folio); - } else if (!non_swap_entry(entry)) { + } else if (softleaf_is_swap(entry)) { /* Genuine swap entries, hence a private anon pages */ if (!should_zap_cows(details)) return 1; @@ -1747,20 +1747,20 @@ static inline int zap_nonpresent_ptes(struct mmu_gather *tlb, nr = swap_pte_batch(pte, max_nr, ptent); rss[MM_SWAPENTS] -= nr; free_swap_and_cache_nr(entry, nr); - } else if (is_migration_entry(entry)) { - struct folio *folio = pfn_swap_entry_folio(entry); + } else if (softleaf_is_migration(entry)) { + struct folio *folio = softleaf_to_folio(entry); if (!should_zap_folio(details, folio)) return 1; rss[mm_counter(folio)]--; - } else if (pte_marker_entry_uffd_wp(entry)) { + } else if (softleaf_is_uffd_wp_marker(entry)) { /* * For anon: always drop the marker; for file: only * drop the marker if explicitly requested. */ if (!vma_is_anonymous(vma) && !zap_drop_markers(details)) return 1; - } else if (is_guard_swp_entry(entry)) { + } else if (softleaf_is_guard_marker(entry)) { /* * Ordinary zapping should not remove guard PTE * markers. Only do so if we should remove PTE markers @@ -1768,7 +1768,8 @@ static inline int zap_nonpresent_ptes(struct mmu_gather *tlb, */ if (!zap_drop_markers(details)) return 1; - } else if (is_hwpoison_entry(entry) || is_poisoned_swp_entry(entry)) { + } else if (softleaf_is_hwpoison(entry) || + softleaf_is_poison_marker(entry)) { if (!should_zap_cows(details)) return 1; } else { @@ -4390,7 +4391,7 @@ static vm_fault_t pte_marker_clear(struct vm_fault *vmf) * * This should also cover the case where e.g. the pte changed * quickly from a PTE_MARKER_UFFD_WP into PTE_MARKER_POISONED. - * So is_pte_marker() check is not enough to safely drop the pte. + * So pte_is_marker() check is not enough to safely drop the pte. */ if (pte_same(vmf->orig_pte, ptep_get(vmf->pte))) pte_clear(vmf->vma->vm_mm, vmf->address, vmf->pte); @@ -4424,8 +4425,8 @@ static vm_fault_t pte_marker_handle_uffd_wp(struct vm_fault *vmf) static vm_fault_t handle_pte_marker(struct vm_fault *vmf) { - swp_entry_t entry = pte_to_swp_entry(vmf->orig_pte); - unsigned long marker = pte_marker_get(entry); + const softleaf_t entry = softleaf_from_pte(vmf->orig_pte); + const pte_marker marker = softleaf_to_marker(entry); /* * PTE markers should never be empty. If anything weird happened, @@ -4442,7 +4443,7 @@ static vm_fault_t handle_pte_marker(struct vm_fault *vmf) if (marker & PTE_MARKER_GUARD) return VM_FAULT_SIGSEGV; - if (pte_marker_entry_uffd_wp(entry)) + if (softleaf_is_uffd_wp_marker(entry)) return pte_marker_handle_uffd_wp(vmf); /* This is an unknown pte marker */ @@ -4690,7 +4691,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) } } else if (is_hwpoison_entry(entry)) { ret = VM_FAULT_HWPOISON; - } else if (is_pte_marker_entry(entry)) { + } else if (softleaf_is_marker(entry)) { ret = handle_pte_marker(vmf); } else { print_bad_pte(vma, vmf->address, vmf->orig_pte, NULL); diff --git a/mm/mincore.c b/mm/mincore.c index 151b2dbb783b..e77c5bc88fc7 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include @@ -42,7 +42,7 @@ static int mincore_hugetlb(pte_t *pte, unsigned long hmask, unsigned long addr, } else { const pte_t ptep = huge_ptep_get(walk->mm, addr, pte); - if (huge_pte_none(ptep) || is_pte_marker(ptep)) + if (huge_pte_none(ptep) || pte_is_marker(ptep)) present = 0; else present = 1; @@ -187,7 +187,7 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, step = 1; /* We need to do cache lookup too for UFFD pte markers */ - if (pte_none(pte) || is_uffd_pte_marker(pte)) + if (pte_none(pte) || pte_is_uffd_marker(pte)) __mincore_unmapped_range(addr, addr + PAGE_SIZE, vma, vec); else if (pte_present(pte)) { diff --git a/mm/mprotect.c b/mm/mprotect.c index ab4e06cd9a69..0bae241eb7aa 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -326,14 +326,14 @@ static long change_pte_range(struct mmu_gather *tlb, newpte = swp_entry_to_pte(entry); if (pte_swp_uffd_wp(oldpte)) newpte = pte_swp_mkuffd_wp(newpte); - } else if (is_pte_marker_entry(entry)) { + } else if (softleaf_is_marker(entry)) { /* * Ignore error swap entries unconditionally, * because any access should sigbus/sigsegv * anyway. */ - if (is_poisoned_swp_entry(entry) || - is_guard_swp_entry(entry)) + if (softleaf_is_poison_marker(entry) || + softleaf_is_guard_marker(entry)) continue; /* * If this is uffd-wp pte marker and we'd like diff --git a/mm/mremap.c b/mm/mremap.c index 8ad06cf50783..7c21b2ad13f6 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -288,7 +288,7 @@ static int move_ptes(struct pagetable_move_control *pmc, pte = move_pte(pte, old_addr, new_addr); pte = move_soft_dirty_pte(pte); - if (need_clear_uffd_wp && pte_marker_uffd_wp(pte)) + if (need_clear_uffd_wp && pte_is_uffd_wp_marker(pte)) pte_clear(mm, new_addr, new_ptep); else { if (need_clear_uffd_wp) { diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 137ce27ff68c..be20468fb5a9 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -3,7 +3,7 @@ #include #include #include -#include +#include #include "internal.h" @@ -107,15 +107,12 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr) pte_t ptent = ptep_get(pvmw->pte); if (pvmw->flags & PVMW_MIGRATION) { - swp_entry_t entry; - if (!is_swap_pte(ptent)) - return false; - entry = pte_to_swp_entry(ptent); + const softleaf_t entry = softleaf_from_pte(ptent); - if (!is_migration_entry(entry)) + if (!softleaf_is_migration(entry)) return false; - pfn = swp_offset_pfn(entry); + pfn = softleaf_to_pfn(entry); } else if (is_swap_pte(ptent)) { swp_entry_t entry; diff --git a/mm/shmem.c b/mm/shmem.c index 6580f3cd24bb..395ca58ac4a5 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -66,7 +66,7 @@ static struct vfsmount *shm_mnt __ro_after_init; #include #include #include -#include +#include #include #include #include @@ -2286,7 +2286,8 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, struct address_space *mapping = inode->i_mapping; struct mm_struct *fault_mm = vma ? vma->vm_mm : NULL; struct shmem_inode_info *info = SHMEM_I(inode); - swp_entry_t swap, index_entry; + swp_entry_t swap; + softleaf_t index_entry; struct swap_info_struct *si; struct folio *folio = NULL; bool skip_swapcache = false; @@ -2298,7 +2299,7 @@ static int shmem_swapin_folio(struct inode *inode, pgoff_t index, swap = index_entry; *foliop = NULL; - if (is_poisoned_swp_entry(index_entry)) + if (softleaf_is_poison_marker(index_entry)) return -EIO; si = get_swap_device(index_entry); diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index cc4ce205bbec..055ec1050776 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -208,7 +208,7 @@ int mfill_atomic_install_pte(pmd_t *dst_pmd, * MISSING|WP registered, we firstly wr-protect a none pte which has no * page cache page backing it, then access the page. */ - if (!pte_none(dst_ptep) && !is_uffd_pte_marker(dst_ptep)) + if (!pte_none(dst_ptep) && !pte_is_uffd_marker(dst_ptep)) goto out_unlock; if (page_in_cache) { @@ -590,7 +590,7 @@ static __always_inline ssize_t mfill_atomic_hugetlb( if (!uffd_flags_mode_is(flags, MFILL_ATOMIC_CONTINUE)) { const pte_t ptep = huge_ptep_get(dst_mm, dst_addr, dst_pte); - if (!huge_pte_none(ptep) && !is_uffd_pte_marker(ptep)) { + if (!huge_pte_none(ptep) && !pte_is_uffd_marker(ptep)) { err = -EEXIST; hugetlb_vma_unlock_read(dst_vma); mutex_unlock(&hugetlb_fault_mutex_table[hash]); -- 2.51.0 There's an established convention in the kernel that we treat PTEs as containing swap entries (and the unfortunately named non-swap swap entries) should they be neither empty (i.e. pte_none() evaluating true) nor present (i.e. pte_present() evaluating true). However, there is some inconsistency in how this is applied, as we also have the is_swap_pte() helper which explicitly performs this check: /* check whether a pte points to a swap entry */ static inline int is_swap_pte(pte_t pte) { return !pte_none(pte) && !pte_present(pte); } As this represents a predicate, and it's logical to assume that in order to establish that a PTE entry can correctly be manipulated as a swap/non-swap entry, this predicate seems as if it must first be checked. But we instead, we far more often utilise the established convention of checking pte_none() / pte_present() before operating on entries as if they were swap/non-swap. This patch works towards correcting this inconsistency by removing all uses of is_swap_pte() where we are already in a position where we perform pte_none()/pte_present() checks anyway or otherwise it is clearly logical to do so. We also take advantage of the fact that pte_swp_uffd_wp() is only set on swap entries. Additionally, update comments referencing to is_swap_pte() and non_swap_entry(). No functional change intended. Signed-off-by: Lorenzo Stoakes --- fs/proc/task_mmu.c | 49 ++++++++++++++++++++++++----------- include/linux/userfaultfd_k.h | 3 +-- mm/hugetlb.c | 6 ++--- mm/internal.h | 6 ++--- mm/khugepaged.c | 29 +++++++++++---------- mm/migrate.c | 2 +- mm/mprotect.c | 43 ++++++++++++++---------------- mm/mremap.c | 7 +++-- mm/page_table_check.c | 13 ++++++---- mm/page_vma_mapped.c | 31 +++++++++++----------- 10 files changed, 104 insertions(+), 85 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 24d26b49d870..ddbf177ecc45 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1017,7 +1017,9 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, young = pte_young(ptent); dirty = pte_dirty(ptent); present = true; - } else if (is_swap_pte(ptent)) { + } else if (pte_none(ptent)) { + smaps_pte_hole_lookup(addr, walk); + } else { swp_entry_t swpent = pte_to_swp_entry(ptent); if (!non_swap_entry(swpent)) { @@ -1038,9 +1040,6 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, present = true; page = pfn_swap_entry_to_page(swpent); } - } else { - smaps_pte_hole_lookup(addr, walk); - return; } if (!page) @@ -1611,6 +1610,9 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, */ pte_t ptent = ptep_get(pte); + if (pte_none(ptent)) + return; + if (pte_present(ptent)) { pte_t old_pte; @@ -1620,7 +1622,7 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, ptent = pte_wrprotect(old_pte); ptent = pte_clear_soft_dirty(ptent); ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent); - } else if (is_swap_pte(ptent)) { + } else { ptent = pte_swp_clear_soft_dirty(ptent); set_pte_at(vma->vm_mm, addr, pte, ptent); } @@ -1923,6 +1925,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, struct page *page = NULL; struct folio *folio; + if (pte_none(pte)) + goto out; + if (pte_present(pte)) { if (pm->show_pfn) frame = pte_pfn(pte); @@ -1932,8 +1937,9 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, flags |= PM_SOFT_DIRTY; if (pte_uffd_wp(pte)) flags |= PM_UFFD_WP; - } else if (is_swap_pte(pte)) { + } else { swp_entry_t entry; + if (pte_swp_soft_dirty(pte)) flags |= PM_SOFT_DIRTY; if (pte_swp_uffd_wp(pte)) @@ -1941,6 +1947,7 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, entry = pte_to_swp_entry(pte); if (pm->show_pfn) { pgoff_t offset; + /* * For PFN swap offsets, keeping the offset field * to be PFN only to be compatible with old smaps. @@ -1969,6 +1976,8 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, __folio_page_mapped_exclusively(folio, page)) flags |= PM_MMAP_EXCLUSIVE; } + +out: if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; @@ -2310,12 +2319,16 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p, struct vm_area_struct *vma, unsigned long addr, pte_t pte) { - unsigned long categories = 0; + unsigned long categories; + + if (pte_none(pte)) + return 0; if (pte_present(pte)) { struct page *page; - categories |= PAGE_IS_PRESENT; + categories = PAGE_IS_PRESENT; + if (!pte_uffd_wp(pte)) categories |= PAGE_IS_WRITTEN; @@ -2329,10 +2342,11 @@ static unsigned long pagemap_page_category(struct pagemap_scan_private *p, categories |= PAGE_IS_PFNZERO; if (pte_soft_dirty(pte)) categories |= PAGE_IS_SOFT_DIRTY; - } else if (is_swap_pte(pte)) { + } else { softleaf_t entry; - categories |= PAGE_IS_SWAPPED; + categories = PAGE_IS_SWAPPED; + if (!pte_swp_uffd_wp_any(pte)) categories |= PAGE_IS_WRITTEN; @@ -2360,12 +2374,12 @@ static void make_uffd_wp_pte(struct vm_area_struct *vma, old_pte = ptep_modify_prot_start(vma, addr, pte); ptent = pte_mkuffd_wp(old_pte); ptep_modify_prot_commit(vma, addr, pte, old_pte, ptent); - } else if (is_swap_pte(ptent)) { - ptent = pte_swp_mkuffd_wp(ptent); - set_pte_at(vma->vm_mm, addr, pte, ptent); - } else { + } else if (pte_none(ptent)) { set_pte_at(vma->vm_mm, addr, pte, make_pte_marker(PTE_MARKER_UFFD_WP)); + } else { + ptent = pte_swp_mkuffd_wp(ptent); + set_pte_at(vma->vm_mm, addr, pte, ptent); } } @@ -2434,6 +2448,9 @@ static unsigned long pagemap_hugetlb_category(pte_t pte) { unsigned long categories = PAGE_IS_HUGE; + if (pte_none(pte)) + return categories; + /* * According to pagemap_hugetlb_range(), file-backed HugeTLB * page cannot be swapped. So PAGE_IS_FILE is not checked for @@ -2441,6 +2458,7 @@ static unsigned long pagemap_hugetlb_category(pte_t pte) */ if (pte_present(pte)) { categories |= PAGE_IS_PRESENT; + if (!huge_pte_uffd_wp(pte)) categories |= PAGE_IS_WRITTEN; if (!PageAnon(pte_page(pte))) @@ -2449,8 +2467,9 @@ static unsigned long pagemap_hugetlb_category(pte_t pte) categories |= PAGE_IS_PFNZERO; if (pte_soft_dirty(pte)) categories |= PAGE_IS_SOFT_DIRTY; - } else if (is_swap_pte(pte)) { + } else { categories |= PAGE_IS_SWAPPED; + if (!pte_swp_uffd_wp_any(pte)) categories |= PAGE_IS_WRITTEN; if (pte_swp_soft_dirty(pte)) diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 983c860a00f1..96b089dff4ef 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -441,9 +441,8 @@ static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) static inline bool pte_swp_uffd_wp_any(pte_t pte) { #ifdef CONFIG_PTE_MARKER_UFFD_WP - if (!is_swap_pte(pte)) + if (pte_present(pte)) return false; - if (pte_swp_uffd_wp(pte)) return true; diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a05edefec1ca..a74cde267c2a 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5798,13 +5798,13 @@ static void move_huge_pte(struct vm_area_struct *vma, unsigned long old_addr, pte = huge_ptep_get_and_clear(mm, old_addr, src_pte, sz); - if (need_clear_uffd_wp && pte_is_uffd_wp_marker(pte)) + if (need_clear_uffd_wp && pte_is_uffd_wp_marker(pte)) { huge_pte_clear(mm, new_addr, dst_pte, sz); - else { + } else { if (need_clear_uffd_wp) { if (pte_present(pte)) pte = huge_pte_clear_uffd_wp(pte); - else if (is_swap_pte(pte)) + else pte = pte_swp_clear_uffd_wp(pte); } set_huge_pte_at(mm, new_addr, dst_pte, pte, sz); diff --git a/mm/internal.h b/mm/internal.h index 116a1ba85e66..9465129367a4 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -325,8 +325,7 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte, /** * pte_move_swp_offset - Move the swap entry offset field of a swap pte * forward or backward by delta - * @pte: The initial pte state; is_swap_pte(pte) must be true and - * non_swap_entry() must be false. + * @pte: The initial pte state; must be a swap entry * @delta: The direction and the offset we are moving; forward if delta * is positive; backward if delta is negative * @@ -352,8 +351,7 @@ static inline pte_t pte_move_swp_offset(pte_t pte, long delta) /** * pte_next_swp_offset - Increment the swap entry offset field of a swap pte. - * @pte: The initial pte state; is_swap_pte(pte) must be true and - * non_swap_entry() must be false. + * @pte: The initial pte state; must be a swap entry. * * Increments the swap offset, while maintaining all other fields, including * swap type, and any swp pte bits. The resulting pte is returned. diff --git a/mm/khugepaged.c b/mm/khugepaged.c index f6ed1072ed6e..a97ff7bcb232 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1019,7 +1019,8 @@ static int __collapse_huge_page_swapin(struct mm_struct *mm, } vmf.orig_pte = ptep_get_lockless(pte); - if (!is_swap_pte(vmf.orig_pte)) + if (pte_none(vmf.orig_pte) || + pte_present(vmf.orig_pte)) continue; vmf.pte = pte; @@ -1276,7 +1277,19 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, for (addr = start_addr, _pte = pte; _pte < pte + HPAGE_PMD_NR; _pte++, addr += PAGE_SIZE) { pte_t pteval = ptep_get(_pte); - if (is_swap_pte(pteval)) { + if (pte_none_or_zero(pteval)) { + ++none_or_zero; + if (!userfaultfd_armed(vma) && + (!cc->is_khugepaged || + none_or_zero <= khugepaged_max_ptes_none)) { + continue; + } else { + result = SCAN_EXCEED_NONE_PTE; + count_vm_event(THP_SCAN_EXCEED_NONE_PTE); + goto out_unmap; + } + } + if (!pte_present(pteval)) { ++unmapped; if (!cc->is_khugepaged || unmapped <= khugepaged_max_ptes_swap) { @@ -1296,18 +1309,6 @@ static int hpage_collapse_scan_pmd(struct mm_struct *mm, goto out_unmap; } } - if (pte_none_or_zero(pteval)) { - ++none_or_zero; - if (!userfaultfd_armed(vma) && - (!cc->is_khugepaged || - none_or_zero <= khugepaged_max_ptes_none)) { - continue; - } else { - result = SCAN_EXCEED_NONE_PTE; - count_vm_event(THP_SCAN_EXCEED_NONE_PTE); - goto out_unmap; - } - } if (pte_uffd_wp(pteval)) { /* * Don't collapse the page if any of the small diff --git a/mm/migrate.c b/mm/migrate.c index ceee354ef215..862b2e261cf9 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -492,7 +492,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, pte = ptep_get(ptep); pte_unmap(ptep); - if (!is_swap_pte(pte)) + if (pte_none(pte) || pte_present(pte)) goto out; entry = pte_to_swp_entry(pte); diff --git a/mm/mprotect.c b/mm/mprotect.c index 0bae241eb7aa..a3e360a8cdec 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -297,7 +297,26 @@ static long change_pte_range(struct mmu_gather *tlb, prot_commit_flush_ptes(vma, addr, pte, oldpte, ptent, nr_ptes, /* idx = */ 0, /* set_write = */ false, tlb); pages += nr_ptes; - } else if (is_swap_pte(oldpte)) { + } else if (pte_none(oldpte)) { + /* + * Nobody plays with any none ptes besides + * userfaultfd when applying the protections. + */ + if (likely(!uffd_wp)) + continue; + + if (userfaultfd_wp_use_markers(vma)) { + /* + * For file-backed mem, we need to be able to + * wr-protect a none pte, because even if the + * pte is none, the page/swap cache could + * exist. Doing that by install a marker. + */ + set_pte_at(vma->vm_mm, addr, pte, + make_pte_marker(PTE_MARKER_UFFD_WP)); + pages++; + } + } else { swp_entry_t entry = pte_to_swp_entry(oldpte); pte_t newpte; @@ -358,28 +377,6 @@ static long change_pte_range(struct mmu_gather *tlb, set_pte_at(vma->vm_mm, addr, pte, newpte); pages++; } - } else { - /* It must be an none page, or what else?.. */ - WARN_ON_ONCE(!pte_none(oldpte)); - - /* - * Nobody plays with any none ptes besides - * userfaultfd when applying the protections. - */ - if (likely(!uffd_wp)) - continue; - - if (userfaultfd_wp_use_markers(vma)) { - /* - * For file-backed mem, we need to be able to - * wr-protect a none pte, because even if the - * pte is none, the page/swap cache could - * exist. Doing that by install a marker. - */ - set_pte_at(vma->vm_mm, addr, pte, - make_pte_marker(PTE_MARKER_UFFD_WP)); - pages++; - } } } while (pte += nr_ptes, addr += nr_ptes * PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); diff --git a/mm/mremap.c b/mm/mremap.c index 7c21b2ad13f6..62b6827abacf 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -158,6 +158,9 @@ static void drop_rmap_locks(struct vm_area_struct *vma) static pte_t move_soft_dirty_pte(pte_t pte) { + if (pte_none(pte)) + return pte; + /* * Set soft dirty bit so we can notice * in userspace the ptes were moved. @@ -165,7 +168,7 @@ static pte_t move_soft_dirty_pte(pte_t pte) #ifdef CONFIG_MEM_SOFT_DIRTY if (pte_present(pte)) pte = pte_mksoft_dirty(pte); - else if (is_swap_pte(pte)) + else pte = pte_swp_mksoft_dirty(pte); #endif return pte; @@ -294,7 +297,7 @@ static int move_ptes(struct pagetable_move_control *pmc, if (need_clear_uffd_wp) { if (pte_present(pte)) pte = pte_clear_uffd_wp(pte); - else if (is_swap_pte(pte)) + else pte = pte_swp_clear_uffd_wp(pte); } set_ptes(mm, new_addr, new_ptep, pte, nr_ptes); diff --git a/mm/page_table_check.c b/mm/page_table_check.c index 4eeca782b888..43f75d2f7c36 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -185,12 +185,15 @@ static inline bool swap_cached_writable(swp_entry_t entry) is_writable_migration_entry(entry); } -static inline void page_table_check_pte_flags(pte_t pte) +static void page_table_check_pte_flags(pte_t pte) { - if (pte_present(pte) && pte_uffd_wp(pte)) - WARN_ON_ONCE(pte_write(pte)); - else if (is_swap_pte(pte) && pte_swp_uffd_wp(pte)) - WARN_ON_ONCE(swap_cached_writable(pte_to_swp_entry(pte))); + if (pte_present(pte)) { + WARN_ON_ONCE(pte_uffd_wp(pte) && pte_write(pte)); + } else if (pte_swp_uffd_wp(pte)) { + const swp_entry_t entry = pte_to_swp_entry(pte); + + WARN_ON_ONCE(swap_cached_writable(entry)); + } } void __page_table_check_ptes_set(struct mm_struct *mm, pte_t *ptep, pte_t pte, diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index be20468fb5a9..a4e23818f37f 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -16,6 +16,7 @@ static inline bool not_found(struct page_vma_mapped_walk *pvmw) static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp, spinlock_t **ptlp) { + bool is_migration; pte_t ptent; if (pvmw->flags & PVMW_SYNC) { @@ -26,6 +27,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp, return !!pvmw->pte; } + is_migration = pvmw->flags & PVMW_MIGRATION; again: /* * It is important to return the ptl corresponding to pte, @@ -41,11 +43,14 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp, ptent = ptep_get(pvmw->pte); - if (pvmw->flags & PVMW_MIGRATION) { - if (!is_swap_pte(ptent)) + if (pte_none(ptent)) { + return false; + } else if (pte_present(ptent)) { + if (is_migration) return false; - } else if (is_swap_pte(ptent)) { + } else if (!is_migration) { swp_entry_t entry; + /* * Handle un-addressable ZONE_DEVICE memory. * @@ -66,8 +71,6 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp, if (!is_device_private_entry(entry) && !is_device_exclusive_entry(entry)) return false; - } else if (!pte_present(ptent)) { - return false; } spin_lock(*ptlp); if (unlikely(!pmd_same(*pmdvalp, pmdp_get_lockless(pvmw->pmd)))) { @@ -113,21 +116,17 @@ static bool check_pte(struct page_vma_mapped_walk *pvmw, unsigned long pte_nr) return false; pfn = softleaf_to_pfn(entry); - } else if (is_swap_pte(ptent)) { - swp_entry_t entry; + } else if (pte_present(ptent)) { + pfn = pte_pfn(ptent); + } else { + const softleaf_t entry = softleaf_from_pte(ptent); /* Handle un-addressable ZONE_DEVICE memory */ - entry = pte_to_swp_entry(ptent); - if (!is_device_private_entry(entry) && - !is_device_exclusive_entry(entry)) - return false; - - pfn = swp_offset_pfn(entry); - } else { - if (!pte_present(ptent)) + if (!softleaf_is_device_private(entry) && + !softleaf_is_device_exclusive(entry)) return false; - pfn = pte_pfn(ptent); + pfn = softleaf_to_pfn(entry); } if ((pfn + pte_nr - 1) < pvmw->pfn) -- 2.51.0 In cases where we can simply utilise the fact that softleaf_from_pte() treats present entries as if they were none entries and thus eliminate spurious uses of is_swap_pte(), do so. No functional change intended. Signed-off-by: Lorenzo Stoakes --- mm/internal.h | 7 +++---- mm/madvise.c | 8 +++----- mm/swap_state.c | 12 ++++++------ mm/swapfile.c | 9 ++++----- 4 files changed, 16 insertions(+), 20 deletions(-) diff --git a/mm/internal.h b/mm/internal.h index 9465129367a4..f0c7461bb02c 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include @@ -380,13 +380,12 @@ static inline int swap_pte_batch(pte_t *start_ptep, int max_nr, pte_t pte) { pte_t expected_pte = pte_next_swp_offset(pte); const pte_t *end_ptep = start_ptep + max_nr; - swp_entry_t entry = pte_to_swp_entry(pte); + const softleaf_t entry = softleaf_from_pte(pte); pte_t *ptep = start_ptep + 1; unsigned short cgroup_id; VM_WARN_ON(max_nr < 1); - VM_WARN_ON(!is_swap_pte(pte)); - VM_WARN_ON(non_swap_entry(entry)); + VM_WARN_ON(!softleaf_is_swap(entry)); cgroup_id = lookup_swap_cgroup_id(entry); while (ptep < end_ptep) { diff --git a/mm/madvise.c b/mm/madvise.c index 2d5ad3cb37bb..58d82495b6c6 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -195,7 +195,7 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, for (addr = start; addr < end; addr += PAGE_SIZE) { pte_t pte; - swp_entry_t entry; + softleaf_t entry; struct folio *folio; if (!ptep++) { @@ -205,10 +205,8 @@ static int swapin_walk_pmd_entry(pmd_t *pmd, unsigned long start, } pte = ptep_get(ptep); - if (!is_swap_pte(pte)) - continue; - entry = pte_to_swp_entry(pte); - if (unlikely(non_swap_entry(entry))) + entry = softleaf_from_pte(pte); + if (unlikely(!softleaf_is_swap(entry))) continue; pte_unmap_unlock(ptep, ptl); diff --git a/mm/swap_state.c b/mm/swap_state.c index d20d238109f9..8881a79f200c 100644 --- a/mm/swap_state.c +++ b/mm/swap_state.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include @@ -732,7 +732,6 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, pte_t *pte = NULL, pentry; int win; unsigned long start, end, addr; - swp_entry_t entry; pgoff_t ilx; bool page_allocated; @@ -744,16 +743,17 @@ static struct folio *swap_vma_readahead(swp_entry_t targ_entry, gfp_t gfp_mask, blk_start_plug(&plug); for (addr = start; addr < end; ilx++, addr += PAGE_SIZE) { + softleaf_t entry; + if (!pte++) { pte = pte_offset_map(vmf->pmd, addr); if (!pte) break; } pentry = ptep_get_lockless(pte); - if (!is_swap_pte(pentry)) - continue; - entry = pte_to_swp_entry(pentry); - if (unlikely(non_swap_entry(entry))) + entry = softleaf_from_pte(pentry); + + if (!softleaf_is_swap(entry)) continue; pte_unmap(pte); pte = NULL; diff --git a/mm/swapfile.c b/mm/swapfile.c index 543f303f101d..684f78cd7dd1 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -44,7 +44,7 @@ #include #include -#include +#include #include #include "swap_table.h" #include "internal.h" @@ -2256,7 +2256,7 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, struct folio *folio; unsigned long offset; unsigned char swp_count; - swp_entry_t entry; + softleaf_t entry; int ret; pte_t ptent; @@ -2267,11 +2267,10 @@ static int unuse_pte_range(struct vm_area_struct *vma, pmd_t *pmd, } ptent = ptep_get_lockless(pte); + entry = softleaf_from_pte(ptent); - if (!is_swap_pte(ptent)) + if (!softleaf_is_swap(entry)) continue; - - entry = pte_to_swp_entry(ptent); if (swp_type(entry) != type) continue; -- 2.51.0 Remove invocations of is_swap_pte() in mm/debug_vm_pgtable.c and use softleaf_from_pte() and softleaf_is_swap() as necessary to replace this usage. We update the test code to use a 'true' swap entry throughout so we are guaranteed this is not a non-swap entry, so all asserts continue to operate correctly. With this change in place, we no longer use is_swap_pte() anywhere, so remove it. Signed-off-by: Lorenzo Stoakes --- include/linux/swapops.h | 6 ------ mm/debug_vm_pgtable.c | 39 ++++++++++++++++++++++++--------------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 0a4b3f51ecf5..a66ac4f2105c 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -120,12 +120,6 @@ static inline unsigned long swp_offset_pfn(swp_entry_t entry) return swp_offset(entry) & SWP_PFN_MASK; } -/* check whether a pte points to a swap entry */ -static inline int is_swap_pte(pte_t pte) -{ - return !pte_none(pte) && !pte_present(pte); -} - /* * Convert the arch-dependent pte representation of a swp_entry_t into an * arch-independent swp_entry_t. diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 055e0e025b42..fff311830959 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include @@ -714,14 +714,16 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args) static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args) { pte_t pte; + softleaf_t entry; if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) return; pr_debug("Validating PTE swap soft dirty\n"); pte = swp_entry_to_pte(args->swp_entry); - WARN_ON(!is_swap_pte(pte)); + entry = softleaf_from_pte(pte); + WARN_ON(!softleaf_is_swap(entry)); WARN_ON(!pte_swp_soft_dirty(pte_swp_mksoft_dirty(pte))); WARN_ON(pte_swp_soft_dirty(pte_swp_clear_soft_dirty(pte))); } @@ -768,40 +770,47 @@ static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) { static void __init pte_swap_exclusive_tests(struct pgtable_debug_args *args) { - swp_entry_t entry, entry2; + swp_entry_t entry; + softleaf_t softleaf; pte_t pte; pr_debug("Validating PTE swap exclusive\n"); entry = args->swp_entry; pte = swp_entry_to_pte(entry); + softleaf = softleaf_from_pte(pte); + WARN_ON(pte_swp_exclusive(pte)); - WARN_ON(!is_swap_pte(pte)); - entry2 = pte_to_swp_entry(pte); - WARN_ON(memcmp(&entry, &entry2, sizeof(entry))); + WARN_ON(!softleaf_is_swap(softleaf)); + WARN_ON(memcmp(&entry, &softleaf, sizeof(entry))); pte = pte_swp_mkexclusive(pte); + softleaf = softleaf_from_pte(pte); + WARN_ON(!pte_swp_exclusive(pte)); - WARN_ON(!is_swap_pte(pte)); + WARN_ON(!softleaf_is_swap(softleaf)); WARN_ON(pte_swp_soft_dirty(pte)); - entry2 = pte_to_swp_entry(pte); - WARN_ON(memcmp(&entry, &entry2, sizeof(entry))); + WARN_ON(memcmp(&entry, &softleaf, sizeof(entry))); pte = pte_swp_clear_exclusive(pte); + softleaf = softleaf_from_pte(pte); + WARN_ON(pte_swp_exclusive(pte)); - WARN_ON(!is_swap_pte(pte)); - entry2 = pte_to_swp_entry(pte); - WARN_ON(memcmp(&entry, &entry2, sizeof(entry))); + WARN_ON(!softleaf_is_swap(softleaf)); + WARN_ON(memcmp(&entry, &softleaf, sizeof(entry))); } static void __init pte_swap_tests(struct pgtable_debug_args *args) { swp_entry_t arch_entry; + softleaf_t entry; pte_t pte1, pte2; pr_debug("Validating PTE swap\n"); pte1 = swp_entry_to_pte(args->swp_entry); - WARN_ON(!is_swap_pte(pte1)); + entry = softleaf_from_pte(pte1); + + WARN_ON(!softleaf_is_swap(entry)); arch_entry = __pte_to_swp_entry(pte1); pte2 = __swp_entry_to_pte(arch_entry); @@ -1218,8 +1227,8 @@ static int __init init_args(struct pgtable_debug_args *args) /* See generic_max_swapfile_size(): probe the maximum offset */ max_swap_offset = swp_offset(pte_to_swp_entry(swp_entry_to_pte(swp_entry(0, ~0UL)))); - /* Create a swp entry with all possible bits set */ - args->swp_entry = swp_entry((1 << MAX_SWAPFILES_SHIFT) - 1, max_swap_offset); + /* Create a swp entry with all possible bits set while still being swap. */ + args->swp_entry = swp_entry(MAX_SWAPFILES - 1, max_swap_offset); /* * Allocate (huge) pages because some of the tests need to access -- 2.51.0 Separate out THP logic so we can drop an indentation level and reduce the amount of noise in this function. We add pagemap_pmd_range_thp() for this purpose. While we're here, convert the VM_BUG_ON() to a VM_WARN_ON_ONCE() at the same time. No functional change intended. Signed-off-by: Lorenzo Stoakes --- fs/proc/task_mmu.c | 146 ++++++++++++++++++++++++--------------------- 1 file changed, 77 insertions(+), 69 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index ddbf177ecc45..5ca18bd3b2d0 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1984,90 +1984,98 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, return make_pme(frame, flags); } -static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, - struct mm_walk *walk) +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static int pagemap_pmd_range_thp(pmd_t *pmdp, unsigned long addr, + unsigned long end, struct vm_area_struct *vma, + struct pagemapread *pm) { - struct vm_area_struct *vma = walk->vma; - struct pagemapread *pm = walk->private; - spinlock_t *ptl; - pte_t *pte, *orig_pte; + unsigned int idx = (addr & ~PMD_MASK) >> PAGE_SHIFT; + u64 flags = 0, frame = 0; + pmd_t pmd = *pmdp; + struct page *page = NULL; + struct folio *folio = NULL; int err = 0; -#ifdef CONFIG_TRANSPARENT_HUGEPAGE - ptl = pmd_trans_huge_lock(pmdp, vma); - if (ptl) { - unsigned int idx = (addr & ~PMD_MASK) >> PAGE_SHIFT; - u64 flags = 0, frame = 0; - pmd_t pmd = *pmdp; - struct page *page = NULL; - struct folio *folio = NULL; + if (vma->vm_flags & VM_SOFTDIRTY) + flags |= PM_SOFT_DIRTY; - if (vma->vm_flags & VM_SOFTDIRTY) - flags |= PM_SOFT_DIRTY; + if (pmd_present(pmd)) { + page = pmd_page(pmd); - if (pmd_present(pmd)) { - page = pmd_page(pmd); + flags |= PM_PRESENT; + if (pmd_soft_dirty(pmd)) + flags |= PM_SOFT_DIRTY; + if (pmd_uffd_wp(pmd)) + flags |= PM_UFFD_WP; + if (pm->show_pfn) + frame = pmd_pfn(pmd) + idx; + } else if (thp_migration_supported() && is_swap_pmd(pmd)) { + swp_entry_t entry = pmd_to_swp_entry(pmd); + unsigned long offset; - flags |= PM_PRESENT; - if (pmd_soft_dirty(pmd)) - flags |= PM_SOFT_DIRTY; - if (pmd_uffd_wp(pmd)) - flags |= PM_UFFD_WP; - if (pm->show_pfn) - frame = pmd_pfn(pmd) + idx; - } -#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION - else if (is_swap_pmd(pmd)) { - swp_entry_t entry = pmd_to_swp_entry(pmd); - unsigned long offset; - - if (pm->show_pfn) { - if (is_pfn_swap_entry(entry)) - offset = swp_offset_pfn(entry) + idx; - else - offset = swp_offset(entry) + idx; - frame = swp_type(entry) | - (offset << MAX_SWAPFILES_SHIFT); - } - flags |= PM_SWAP; - if (pmd_swp_soft_dirty(pmd)) - flags |= PM_SOFT_DIRTY; - if (pmd_swp_uffd_wp(pmd)) - flags |= PM_UFFD_WP; - VM_BUG_ON(!is_pmd_migration_entry(pmd)); - page = pfn_swap_entry_to_page(entry); + if (pm->show_pfn) { + if (is_pfn_swap_entry(entry)) + offset = swp_offset_pfn(entry) + idx; + else + offset = swp_offset(entry) + idx; + frame = swp_type(entry) | + (offset << MAX_SWAPFILES_SHIFT); } -#endif + flags |= PM_SWAP; + if (pmd_swp_soft_dirty(pmd)) + flags |= PM_SOFT_DIRTY; + if (pmd_swp_uffd_wp(pmd)) + flags |= PM_UFFD_WP; + VM_WARN_ON_ONCE(!is_pmd_migration_entry(pmd)); + page = pfn_swap_entry_to_page(entry); + } - if (page) { - folio = page_folio(page); - if (!folio_test_anon(folio)) - flags |= PM_FILE; - } + if (page) { + folio = page_folio(page); + if (!folio_test_anon(folio)) + flags |= PM_FILE; + } - for (; addr != end; addr += PAGE_SIZE, idx++) { - u64 cur_flags = flags; - pagemap_entry_t pme; + for (; addr != end; addr += PAGE_SIZE, idx++) { + u64 cur_flags = flags; + pagemap_entry_t pme; - if (folio && (flags & PM_PRESENT) && - __folio_page_mapped_exclusively(folio, page)) - cur_flags |= PM_MMAP_EXCLUSIVE; + if (folio && (flags & PM_PRESENT) && + __folio_page_mapped_exclusively(folio, page)) + cur_flags |= PM_MMAP_EXCLUSIVE; - pme = make_pme(frame, cur_flags); - err = add_to_pagemap(&pme, pm); - if (err) - break; - if (pm->show_pfn) { - if (flags & PM_PRESENT) - frame++; - else if (flags & PM_SWAP) - frame += (1 << MAX_SWAPFILES_SHIFT); - } + pme = make_pme(frame, cur_flags); + err = add_to_pagemap(&pme, pm); + if (err) + break; + if (pm->show_pfn) { + if (flags & PM_PRESENT) + frame++; + else if (flags & PM_SWAP) + frame += (1 << MAX_SWAPFILES_SHIFT); } + } + return err; +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, + struct mm_walk *walk) +{ + struct vm_area_struct *vma = walk->vma; + struct pagemapread *pm = walk->private; + spinlock_t *ptl; + pte_t *pte, *orig_pte; + int err = 0; + +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + ptl = pmd_trans_huge_lock(pmdp, vma); + if (ptl) { + err = pagemap_pmd_range_thp(pmdp, addr, end, vma, pm); spin_unlock(ptl); return err; } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif /* * We can assume that @vma always points to a valid one and @end never -- 2.51.0 PMD 'non-swap' swap entries are currently used for PMD-level migration entries and device private entries. To add to the confusion in this terminology we use is_swap_pmd() in an inconsistent way similar to how is_swap_pte() was being used - sometimes adopting the convention that pmd_none(), !pmd_present() implies PMD 'swap' entry, sometimes not. This patch handles the low-hanging fruit of cases where we can simply substitute other predicates for is_swap_pmd(). No functional change intended. Signed-off-by: Lorenzo Stoakes --- fs/proc/task_mmu.c | 15 ++++++++++--- include/linux/swapops.h | 16 +++++++++++-- mm/huge_memory.c | 4 +++- mm/memory.c | 50 +++++++++++++++++++++++------------------ mm/page_table_check.c | 12 ++++++---- 5 files changed, 65 insertions(+), 32 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 5ca18bd3b2d0..b68eabb26f29 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1059,10 +1059,12 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, bool present = false; struct folio *folio; + if (pmd_none(*pmd)) + return; if (pmd_present(*pmd)) { page = vm_normal_page_pmd(vma, addr, *pmd); present = true; - } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { + } else if (unlikely(thp_migration_supported())) { swp_entry_t entry = pmd_to_swp_entry(*pmd); if (is_pfn_swap_entry(entry)) @@ -1999,6 +2001,9 @@ static int pagemap_pmd_range_thp(pmd_t *pmdp, unsigned long addr, if (vma->vm_flags & VM_SOFTDIRTY) flags |= PM_SOFT_DIRTY; + if (pmd_none(pmd)) + goto populate_pagemap; + if (pmd_present(pmd)) { page = pmd_page(pmd); @@ -2009,7 +2014,7 @@ static int pagemap_pmd_range_thp(pmd_t *pmdp, unsigned long addr, flags |= PM_UFFD_WP; if (pm->show_pfn) frame = pmd_pfn(pmd) + idx; - } else if (thp_migration_supported() && is_swap_pmd(pmd)) { + } else if (thp_migration_supported()) { swp_entry_t entry = pmd_to_swp_entry(pmd); unsigned long offset; @@ -2036,6 +2041,7 @@ static int pagemap_pmd_range_thp(pmd_t *pmdp, unsigned long addr, flags |= PM_FILE; } +populate_pagemap: for (; addr != end; addr += PAGE_SIZE, idx++) { u64 cur_flags = flags; pagemap_entry_t pme; @@ -2398,6 +2404,9 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, { unsigned long categories = PAGE_IS_HUGE; + if (pmd_none(pmd)) + return categories; + if (pmd_present(pmd)) { struct page *page; @@ -2415,7 +2424,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, categories |= PAGE_IS_PFNZERO; if (pmd_soft_dirty(pmd)) categories |= PAGE_IS_SOFT_DIRTY; - } else if (is_swap_pmd(pmd)) { + } else { swp_entry_t swp; categories |= PAGE_IS_SWAPPED; diff --git a/include/linux/swapops.h b/include/linux/swapops.h index a66ac4f2105c..3e8dd6ea94dd 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -509,7 +509,13 @@ static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) static inline int is_pmd_migration_entry(pmd_t pmd) { - return is_swap_pmd(pmd) && is_migration_entry(pmd_to_swp_entry(pmd)); + swp_entry_t entry; + + if (pmd_present(pmd)) + return 0; + + entry = pmd_to_swp_entry(pmd); + return is_migration_entry(entry); } #else /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ static inline int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, @@ -557,7 +563,13 @@ static inline int is_pmd_migration_entry(pmd_t pmd) */ static inline int is_pmd_device_private_entry(pmd_t pmd) { - return is_swap_pmd(pmd) && is_device_private_entry(pmd_to_swp_entry(pmd)); + swp_entry_t entry; + + if (pmd_present(pmd)) + return 0; + + entry = pmd_to_swp_entry(pmd); + return is_device_private_entry(entry); } #else /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f6c353a8d7bd..2e5196a68f14 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2429,9 +2429,11 @@ static pmd_t move_soft_dirty_pmd(pmd_t pmd) static pmd_t clear_uffd_wp_pmd(pmd_t pmd) { + if (pmd_none(pmd)) + return pmd; if (pmd_present(pmd)) pmd = pmd_clear_uffd_wp(pmd); - else if (is_swap_pmd(pmd)) + else pmd = pmd_swp_clear_uffd_wp(pmd); return pmd; diff --git a/mm/memory.c b/mm/memory.c index 7493ed084b99..fea079e5fb90 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1376,6 +1376,7 @@ copy_pmd_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, next = pmd_addr_end(addr, end); if (is_swap_pmd(*src_pmd) || pmd_trans_huge(*src_pmd)) { int err; + VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma); err = copy_huge_pmd(dst_mm, src_mm, dst_pmd, src_pmd, addr, dst_vma, src_vma); @@ -6350,35 +6351,40 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, if (pmd_none(*vmf.pmd) && thp_vma_allowable_order(vma, vm_flags, TVA_PAGEFAULT, PMD_ORDER)) { ret = create_huge_pmd(&vmf); - if (!(ret & VM_FAULT_FALLBACK)) + if (ret & VM_FAULT_FALLBACK) + goto fallback; + else return ret; - } else { - vmf.orig_pmd = pmdp_get_lockless(vmf.pmd); + } - if (unlikely(is_swap_pmd(vmf.orig_pmd))) { - if (is_pmd_device_private_entry(vmf.orig_pmd)) - return do_huge_pmd_device_private(&vmf); + vmf.orig_pmd = pmdp_get_lockless(vmf.pmd); + if (pmd_none(vmf.orig_pmd)) + goto fallback; - if (is_pmd_migration_entry(vmf.orig_pmd)) - pmd_migration_entry_wait(mm, vmf.pmd); - return 0; - } - if (pmd_trans_huge(vmf.orig_pmd)) { - if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma)) - return do_huge_pmd_numa_page(&vmf); + if (unlikely(!pmd_present(vmf.orig_pmd))) { + if (is_pmd_device_private_entry(vmf.orig_pmd)) + return do_huge_pmd_device_private(&vmf); - if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) && - !pmd_write(vmf.orig_pmd)) { - ret = wp_huge_pmd(&vmf); - if (!(ret & VM_FAULT_FALLBACK)) - return ret; - } else { - huge_pmd_set_accessed(&vmf); - return 0; - } + if (is_pmd_migration_entry(vmf.orig_pmd)) + pmd_migration_entry_wait(mm, vmf.pmd); + return 0; + } + if (pmd_trans_huge(vmf.orig_pmd)) { + if (pmd_protnone(vmf.orig_pmd) && vma_is_accessible(vma)) + return do_huge_pmd_numa_page(&vmf); + + if ((flags & (FAULT_FLAG_WRITE|FAULT_FLAG_UNSHARE)) && + !pmd_write(vmf.orig_pmd)) { + ret = wp_huge_pmd(&vmf); + if (!(ret & VM_FAULT_FALLBACK)) + return ret; + } else { + huge_pmd_set_accessed(&vmf); + return 0; } } +fallback: return handle_pte_fault(&vmf); } diff --git a/mm/page_table_check.c b/mm/page_table_check.c index 43f75d2f7c36..f5f25e120f69 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -215,10 +215,14 @@ EXPORT_SYMBOL(__page_table_check_ptes_set); static inline void page_table_check_pmd_flags(pmd_t pmd) { - if (pmd_present(pmd) && pmd_uffd_wp(pmd)) - WARN_ON_ONCE(pmd_write(pmd)); - else if (is_swap_pmd(pmd) && pmd_swp_uffd_wp(pmd)) - WARN_ON_ONCE(swap_cached_writable(pmd_to_swp_entry(pmd))); + if (pmd_present(pmd)) { + if (pmd_uffd_wp(pmd)) + WARN_ON_ONCE(pmd_write(pmd)); + } else if (pmd_swp_uffd_wp(pmd)) { + swp_entry_t entry = pmd_to_swp_entry(pmd); + + WARN_ON_ONCE(swap_cached_writable(entry)); + } } void __page_table_check_pmds_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd, -- 2.51.0 Right now we are inconsistent in our use of thp_migration_supported(): static inline bool thp_migration_supported(void) { return IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION); } And simply having arbitrary and ugly #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION blocks in code. This is exhibited in copy_huge_pmd(), which inserts a large #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION block and an if-branch which is difficult to follow It's difficult to follow the logic of such a large function and the non-present PMD logic is clearly separate as it sits in a giant if-branch. Therefore this patch both separates out the logic and utilises thp_migration_supported(). No functional change intended. Signed-off-by: Lorenzo Stoakes --- mm/huge_memory.c | 109 +++++++++++++++++++++++++---------------------- 1 file changed, 59 insertions(+), 50 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2e5196a68f14..31116d69e289 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1774,6 +1774,62 @@ void touch_pmd(struct vm_area_struct *vma, unsigned long addr, update_mmu_cache_pmd(vma, addr, pmd); } +static void copy_huge_non_present_pmd( + struct mm_struct *dst_mm, struct mm_struct *src_mm, + pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, + struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, + pmd_t pmd, pgtable_t pgtable) +{ + swp_entry_t entry = pmd_to_swp_entry(pmd); + struct folio *src_folio; + + VM_WARN_ON(!is_pmd_non_present_folio_entry(pmd)); + + if (is_writable_migration_entry(entry) || + is_readable_exclusive_migration_entry(entry)) { + entry = make_readable_migration_entry(swp_offset(entry)); + pmd = swp_entry_to_pmd(entry); + if (pmd_swp_soft_dirty(*src_pmd)) + pmd = pmd_swp_mksoft_dirty(pmd); + if (pmd_swp_uffd_wp(*src_pmd)) + pmd = pmd_swp_mkuffd_wp(pmd); + set_pmd_at(src_mm, addr, src_pmd, pmd); + } else if (is_device_private_entry(entry)) { + /* + * For device private entries, since there are no + * read exclusive entries, writable = !readable + */ + if (is_writable_device_private_entry(entry)) { + entry = make_readable_device_private_entry(swp_offset(entry)); + pmd = swp_entry_to_pmd(entry); + + if (pmd_swp_soft_dirty(*src_pmd)) + pmd = pmd_swp_mksoft_dirty(pmd); + if (pmd_swp_uffd_wp(*src_pmd)) + pmd = pmd_swp_mkuffd_wp(pmd); + set_pmd_at(src_mm, addr, src_pmd, pmd); + } + + src_folio = pfn_swap_entry_folio(entry); + VM_WARN_ON(!folio_test_large(src_folio)); + + folio_get(src_folio); + /* + * folio_try_dup_anon_rmap_pmd does not fail for + * device private entries. + */ + folio_try_dup_anon_rmap_pmd(src_folio, &src_folio->page, + dst_vma, src_vma); + } + + add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); + mm_inc_nr_ptes(dst_mm); + pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); + if (!userfaultfd_wp(dst_vma)) + pmd = pmd_swp_clear_uffd_wp(pmd); + set_pmd_at(dst_mm, addr, dst_pmd, pmd); +} + int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma) @@ -1819,59 +1875,12 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ret = -EAGAIN; pmd = *src_pmd; -#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION - if (unlikely(is_swap_pmd(pmd))) { - swp_entry_t entry = pmd_to_swp_entry(pmd); - - VM_WARN_ON(!is_pmd_non_present_folio_entry(pmd)); - - if (is_writable_migration_entry(entry) || - is_readable_exclusive_migration_entry(entry)) { - entry = make_readable_migration_entry(swp_offset(entry)); - pmd = swp_entry_to_pmd(entry); - if (pmd_swp_soft_dirty(*src_pmd)) - pmd = pmd_swp_mksoft_dirty(pmd); - if (pmd_swp_uffd_wp(*src_pmd)) - pmd = pmd_swp_mkuffd_wp(pmd); - set_pmd_at(src_mm, addr, src_pmd, pmd); - } else if (is_device_private_entry(entry)) { - /* - * For device private entries, since there are no - * read exclusive entries, writable = !readable - */ - if (is_writable_device_private_entry(entry)) { - entry = make_readable_device_private_entry(swp_offset(entry)); - pmd = swp_entry_to_pmd(entry); - - if (pmd_swp_soft_dirty(*src_pmd)) - pmd = pmd_swp_mksoft_dirty(pmd); - if (pmd_swp_uffd_wp(*src_pmd)) - pmd = pmd_swp_mkuffd_wp(pmd); - set_pmd_at(src_mm, addr, src_pmd, pmd); - } - - src_folio = pfn_swap_entry_folio(entry); - VM_WARN_ON(!folio_test_large(src_folio)); - - folio_get(src_folio); - /* - * folio_try_dup_anon_rmap_pmd does not fail for - * device private entries. - */ - folio_try_dup_anon_rmap_pmd(src_folio, &src_folio->page, - dst_vma, src_vma); - } - - add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR); - mm_inc_nr_ptes(dst_mm); - pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable); - if (!userfaultfd_wp(dst_vma)) - pmd = pmd_swp_clear_uffd_wp(pmd); - set_pmd_at(dst_mm, addr, dst_pmd, pmd); + if (unlikely(thp_migration_supported() && is_swap_pmd(pmd))) { + copy_huge_non_present_pmd(dst_mm, src_mm, dst_pmd, src_pmd, addr, + dst_vma, src_vma, pmd, pgtable); ret = 0; goto out_unlock; } -#endif if (unlikely(!pmd_trans_huge(pmd))) { pte_free(dst_mm, pgtable); -- 2.51.0 Similar to copy_huge_pmd(), there is a large mass of open-coded logic for the CONFIG_ARCH_ENABLE_THP_MIGRATION non-present entry case that does not use thp_migration_supported() consistently. Resolve this by separating out this logic and introduce change_non_present_huge_pmd(). No functional change intended. Signed-off-by: Lorenzo Stoakes --- mm/huge_memory.c | 72 ++++++++++++++++++++++++++---------------------- 1 file changed, 39 insertions(+), 33 deletions(-) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 31116d69e289..40a8a2c1e080 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2499,6 +2499,42 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, return false; } +static void change_non_present_huge_pmd(struct mm_struct *mm, + unsigned long addr, pmd_t *pmd, bool uffd_wp, + bool uffd_wp_resolve) +{ + swp_entry_t entry = pmd_to_swp_entry(*pmd); + struct folio *folio = pfn_swap_entry_folio(entry); + pmd_t newpmd; + + VM_WARN_ON(!is_pmd_non_present_folio_entry(*pmd)); + if (is_writable_migration_entry(entry)) { + /* + * A protection check is difficult so + * just be safe and disable write + */ + if (folio_test_anon(folio)) + entry = make_readable_exclusive_migration_entry(swp_offset(entry)); + else + entry = make_readable_migration_entry(swp_offset(entry)); + newpmd = swp_entry_to_pmd(entry); + if (pmd_swp_soft_dirty(*pmd)) + newpmd = pmd_swp_mksoft_dirty(newpmd); + } else if (is_writable_device_private_entry(entry)) { + entry = make_readable_device_private_entry(swp_offset(entry)); + newpmd = swp_entry_to_pmd(entry); + } else { + newpmd = *pmd; + } + + if (uffd_wp) + newpmd = pmd_swp_mkuffd_wp(newpmd); + else if (uffd_wp_resolve) + newpmd = pmd_swp_clear_uffd_wp(newpmd); + if (!pmd_same(*pmd, newpmd)) + set_pmd_at(mm, addr, pmd, newpmd); +} + /* * Returns * - 0 if PMD could not be locked @@ -2527,41 +2563,11 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (!ptl) return 0; -#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION - if (is_swap_pmd(*pmd)) { - swp_entry_t entry = pmd_to_swp_entry(*pmd); - struct folio *folio = pfn_swap_entry_folio(entry); - pmd_t newpmd; - - VM_WARN_ON(!is_pmd_non_present_folio_entry(*pmd)); - if (is_writable_migration_entry(entry)) { - /* - * A protection check is difficult so - * just be safe and disable write - */ - if (folio_test_anon(folio)) - entry = make_readable_exclusive_migration_entry(swp_offset(entry)); - else - entry = make_readable_migration_entry(swp_offset(entry)); - newpmd = swp_entry_to_pmd(entry); - if (pmd_swp_soft_dirty(*pmd)) - newpmd = pmd_swp_mksoft_dirty(newpmd); - } else if (is_writable_device_private_entry(entry)) { - entry = make_readable_device_private_entry(swp_offset(entry)); - newpmd = swp_entry_to_pmd(entry); - } else { - newpmd = *pmd; - } - - if (uffd_wp) - newpmd = pmd_swp_mkuffd_wp(newpmd); - else if (uffd_wp_resolve) - newpmd = pmd_swp_clear_uffd_wp(newpmd); - if (!pmd_same(*pmd, newpmd)) - set_pmd_at(mm, addr, pmd, newpmd); + if (thp_migration_supported() && is_swap_pmd(*pmd)) { + change_non_present_huge_pmd(mm, addr, pmd, uffd_wp, + uffd_wp_resolve); goto unlock; } -#endif if (prot_numa) { -- 2.51.0 Introduce softleaf_from_pmd() to do the equivalent operation for PMDs that softleaf_from_pte() fulfils, and cascade changes through code base accordingly, introducing helpers as necessary. We are then able to eliminate pmd_to_swp_entry(), is_pmd_migration_entry(), is_pmd_device_private_entry() and is_pmd_non_present_folio_entry(). This further establishes the use of leaf operations throughout the code base and further establishes the foundations for eliminating is_swap_pmd(). No functional change intended. Signed-off-by: Lorenzo Stoakes --- fs/proc/task_mmu.c | 27 +++-- include/linux/leafops.h | 220 ++++++++++++++++++++++++++++++++++++++++ include/linux/migrate.h | 2 +- include/linux/swapops.h | 100 ------------------ mm/damon/ops-common.c | 6 +- mm/filemap.c | 6 +- mm/hmm.c | 16 +-- mm/huge_memory.c | 98 +++++++++--------- mm/khugepaged.c | 4 +- mm/madvise.c | 2 +- mm/memory.c | 4 +- mm/mempolicy.c | 4 +- mm/migrate.c | 20 ++-- mm/migrate_device.c | 14 +-- mm/page_table_check.c | 16 +-- mm/page_vma_mapped.c | 15 +-- mm/pagewalk.c | 8 +- mm/rmap.c | 4 +- 18 files changed, 343 insertions(+), 223 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index b68eabb26f29..d982fdfcf057 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1065,10 +1065,10 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, page = vm_normal_page_pmd(vma, addr, *pmd); present = true; } else if (unlikely(thp_migration_supported())) { - swp_entry_t entry = pmd_to_swp_entry(*pmd); + const softleaf_t entry = softleaf_from_pmd(*pmd); - if (is_pfn_swap_entry(entry)) - page = pfn_swap_entry_to_page(entry); + if (softleaf_has_pfn(entry)) + page = softleaf_to_page(entry); } if (IS_ERR_OR_NULL(page)) return; @@ -1654,7 +1654,7 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, pmd = pmd_clear_soft_dirty(pmd); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); - } else if (is_migration_entry(pmd_to_swp_entry(pmd))) { + } else if (pmd_is_migration_entry(pmd)) { pmd = pmd_swp_clear_soft_dirty(pmd); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } @@ -2015,12 +2015,12 @@ static int pagemap_pmd_range_thp(pmd_t *pmdp, unsigned long addr, if (pm->show_pfn) frame = pmd_pfn(pmd) + idx; } else if (thp_migration_supported()) { - swp_entry_t entry = pmd_to_swp_entry(pmd); + const softleaf_t entry = softleaf_from_pmd(pmd); unsigned long offset; if (pm->show_pfn) { - if (is_pfn_swap_entry(entry)) - offset = swp_offset_pfn(entry) + idx; + if (softleaf_has_pfn(entry)) + offset = softleaf_to_pfn(entry) + idx; else offset = swp_offset(entry) + idx; frame = swp_type(entry) | @@ -2031,7 +2031,7 @@ static int pagemap_pmd_range_thp(pmd_t *pmdp, unsigned long addr, flags |= PM_SOFT_DIRTY; if (pmd_swp_uffd_wp(pmd)) flags |= PM_UFFD_WP; - VM_WARN_ON_ONCE(!is_pmd_migration_entry(pmd)); + VM_WARN_ON_ONCE(!pmd_is_migration_entry(pmd)); page = pfn_swap_entry_to_page(entry); } @@ -2425,8 +2425,6 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, if (pmd_soft_dirty(pmd)) categories |= PAGE_IS_SOFT_DIRTY; } else { - swp_entry_t swp; - categories |= PAGE_IS_SWAPPED; if (!pmd_swp_uffd_wp(pmd)) categories |= PAGE_IS_WRITTEN; @@ -2434,9 +2432,10 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, categories |= PAGE_IS_SOFT_DIRTY; if (p->masks_of_interest & PAGE_IS_FILE) { - swp = pmd_to_swp_entry(pmd); - if (is_pfn_swap_entry(swp) && - !folio_test_anon(pfn_swap_entry_folio(swp))) + const softleaf_t entry = softleaf_from_pmd(pmd); + + if (softleaf_has_pfn(entry) && + !folio_test_anon(softleaf_to_folio(entry))) categories |= PAGE_IS_FILE; } } @@ -2453,7 +2452,7 @@ static void make_uffd_wp_pmd(struct vm_area_struct *vma, old = pmdp_invalidate_ad(vma, addr, pmdp); pmd = pmd_mkuffd_wp(old); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); - } else if (is_migration_entry(pmd_to_swp_entry(pmd))) { + } else if (pmd_is_migration_entry(pmd)) { pmd = pmd_swp_mkuffd_wp(pmd); set_pmd_at(vma->vm_mm, addr, pmdp, pmd); } diff --git a/include/linux/leafops.h b/include/linux/leafops.h index 1376589d94b0..9be9a4e8ada4 100644 --- a/include/linux/leafops.h +++ b/include/linux/leafops.h @@ -61,6 +61,57 @@ static inline softleaf_t softleaf_from_pte(pte_t pte) return pte_to_swp_entry(pte); } +/** + * softleaf_to_pte() - Obtain a PTE entry from a leaf entry. + * @entry: Leaf entry. + * + * This generates an architecture-specific PTE entry that can be utilised to + * encode the metadata the leaf entry encodes. + * + * Returns: Architecture-specific PTE entry encoding leaf entry. + */ +static inline pte_t softleaf_to_pte(softleaf_t entry) +{ + /* Temporary until swp_entry_t eliminated. */ + return swp_entry_to_pte(entry); +} + +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +/** + * softleaf_from_pmd() - Obtain a leaf entry from a PMD entry. + * @pmd: PMD entry. + * + * If @pmd is present (therefore not a leaf entry) the function returns an empty + * leaf entry. Otherwise, it returns a leaf entry. + * + * Returns: Leaf entry. + */ +static inline softleaf_t softleaf_from_pmd(pmd_t pmd) +{ + softleaf_t arch_entry; + + if (pmd_present(pmd)) + return softleaf_mk_none(); + + if (pmd_swp_soft_dirty(pmd)) + pmd = pmd_swp_clear_soft_dirty(pmd); + if (pmd_swp_uffd_wp(pmd)) + pmd = pmd_swp_clear_uffd_wp(pmd); + arch_entry = __pmd_to_swp_entry(pmd); + + /* Temporary until swp_entry_t eliminated. */ + return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); +} + +#else + +static inline softleaf_t softleaf_from_pmd(pmd_t pmd) +{ + return softleaf_mk_none(); +} + +#endif + /** * softleaf_is_none() - Is the leaf entry empty? * @entry: Leaf entry. @@ -134,6 +185,43 @@ static inline bool softleaf_is_swap(softleaf_t entry) return softleaf_type(entry) == SOFTLEAF_SWAP; } +/** + * softleaf_is_migration_write() - Is this leaf entry a writable migration entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a writable migration entry, otherwise + * false. + */ +static inline bool softleaf_is_migration_write(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_MIGRATION_WRITE; +} + +/** + * softleaf_is_migration_read() - Is this leaf entry a readable migration entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a readable migration entry, otherwise + * false. + */ +static inline bool softleaf_is_migration_read(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_MIGRATION_READ; +} + +/** + * softleaf_is_migration_read_exclusive() - Is this leaf entry an exclusive + * readable migration entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is an exclusive readable migration entry, + * otherwise false. + */ +static inline bool softleaf_is_migration_read_exclusive(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_MIGRATION_READ_EXCLUSIVE; +} + /** * softleaf_is_swap() - Is this leaf entry a migration entry? * @entry: Leaf entry. @@ -152,6 +240,19 @@ static inline bool softleaf_is_migration(softleaf_t entry) } } +/** + * softleaf_is_device_private_write() - Is this leaf entry a device private + * writable entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a device private writable entry, otherwise + * false. + */ +static inline bool softleaf_is_device_private_write(softleaf_t entry) +{ + return softleaf_type(entry) == SOFTLEAF_DEVICE_PRIVATE_WRITE; +} + /** * softleaf_is_device_private() - Is this leaf entry a device private entry? * @entry: Leaf entry. @@ -169,6 +270,12 @@ static inline bool softleaf_is_device_private(softleaf_t entry) } } +/** + * softleaf_is_device_exclusive() - Is this leaf entry a device-exclusive entry? + * @entry: Leaf entry. + * + * Returns: true if the leaf entry is a device-exclusive entry, otherwise false. + */ static inline bool softleaf_is_device_exclusive(softleaf_t entry) { return softleaf_type(entry) == SOFTLEAF_DEVICE_EXCLUSIVE; @@ -327,6 +434,61 @@ static inline bool softleaf_is_uffd_wp_marker(softleaf_t entry) return softleaf_to_marker(entry) & PTE_MARKER_UFFD_WP; } +#ifdef CONFIG_MIGRATION + +/** + * softleaf_is_migration_young() - Does this migration entry contain an accessed + * bit? + * @entry: Leaf entry. + * + * If the architecture can support storing A/D bits in migration entries, this + * determines whether the accessed (or 'young') bit was set on the migrated page + * table entry. + * + * Returns: true if the entry contains an accessed bit, otherwise false. + */ +static inline bool softleaf_is_migration_young(softleaf_t entry) +{ + VM_WARN_ON_ONCE(!softleaf_is_migration(entry)); + + if (migration_entry_supports_ad()) + return swp_offset(entry) & SWP_MIG_YOUNG; + /* Keep the old behavior of aging page after migration */ + return false; +} + +/** + * softleaf_is_migration_dirty() - Does this migration entry contain a dirty bit? + * @entry: Leaf entry. + * + * If the architecture can support storing A/D bits in migration entries, this + * determines whether the dirty bit was set on the migrated page table entry. + * + * Returns: true if the entry contains a dirty bit, otherwise false. + */ +static inline bool softleaf_is_migration_dirty(softleaf_t entry) +{ + VM_WARN_ON_ONCE(!softleaf_is_migration(entry)); + + if (migration_entry_supports_ad()) + return swp_offset(entry) & SWP_MIG_DIRTY; + /* Keep the old behavior of clean page after migration */ + return false; +} + +#else /* CONFIG_MIGRATION */ + +static inline bool softleaf_is_migration_young(softleaf_t entry) +{ + return false; +} + +static inline bool softleaf_is_migration_dirty(softleaf_t entry) +{ + return false; +} +#endif /* CONFIG_MIGRATION */ + /** * pte_is_marker() - Does the PTE entry encode a marker leaf entry? * @pte: PTE entry. @@ -378,5 +540,63 @@ static inline bool pte_is_uffd_marker(pte_t pte) return false; } +#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_ENABLE_THP_MIGRATION) + +/** + * pmd_is_device_private_entry() - Check if PMD contains a device private swap + * entry. + * @pmd: The PMD to check. + * + * Returns true if the PMD contains a swap entry that represents a device private + * page mapping. This is used for zone device private pages that have been + * swapped out but still need special handling during various memory management + * operations. + * + * Return: true if PMD contains device private entry, false otherwise + */ +static inline bool pmd_is_device_private_entry(pmd_t pmd) +{ + return softleaf_is_device_private(softleaf_from_pmd(pmd)); +} + +#else /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ + +static inline bool pmd_is_device_private_entry(pmd_t pmd) +{ + return false; +} + +#endif /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ + +/** + * pmd_is_migration_entry() - Does this PMD entry encode a migration entry? + * @pmd: PMD entry. + * + * Returns: true if the PMD encodes a migration entry, otherwise false. + */ +static inline bool pmd_is_migration_entry(pmd_t pmd) +{ + return softleaf_is_migration(softleaf_from_pmd(pmd)); +} + +/** + * pmd_is_valid_softleaf() - Is this PMD entry a valid leaf entry? + * @pmd: PMD entry. + * + * PMD leaf entries are valid only if they are device private or migration + * entries. This function asserts that a PMD leaf entry is valid in this + * respect. + * + * Returns: true if the PMD entry is a valid leaf entry, otherwise false. + */ +static inline bool pmd_is_valid_softleaf(pmd_t pmd) +{ + const softleaf_t entry = softleaf_from_pmd(pmd); + + /* Only device private, migration entries valid for PMD. */ + return softleaf_is_device_private(entry) || + softleaf_is_migration(entry); +} + #endif /* CONFIG_MMU */ #endif /* _LINUX_SWAPOPS_H */ diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 41b4cc05a450..26ca00c325d9 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -65,7 +65,7 @@ bool isolate_folio_to_list(struct folio *folio, struct list_head *list); int migrate_huge_page_move_mapping(struct address_space *mapping, struct folio *dst, struct folio *src); -void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl) +void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) __releases(ptl); void folio_migrate_flags(struct folio *newfolio, struct folio *folio); int folio_migrate_mapping(struct address_space *mapping, diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 3e8dd6ea94dd..f1277647262d 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -283,14 +283,6 @@ static inline swp_entry_t make_migration_entry_young(swp_entry_t entry) return entry; } -static inline bool is_migration_entry_young(swp_entry_t entry) -{ - if (migration_entry_supports_ad()) - return swp_offset(entry) & SWP_MIG_YOUNG; - /* Keep the old behavior of aging page after migration */ - return false; -} - static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) { if (migration_entry_supports_ad()) @@ -299,14 +291,6 @@ static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) return entry; } -static inline bool is_migration_entry_dirty(swp_entry_t entry) -{ - if (migration_entry_supports_ad()) - return swp_offset(entry) & SWP_MIG_DIRTY; - /* Keep the old behavior of clean page after migration */ - return false; -} - extern void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address); extern void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, pte_t *pte); @@ -349,20 +333,11 @@ static inline swp_entry_t make_migration_entry_young(swp_entry_t entry) return entry; } -static inline bool is_migration_entry_young(swp_entry_t entry) -{ - return false; -} - static inline swp_entry_t make_migration_entry_dirty(swp_entry_t entry) { return entry; } -static inline bool is_migration_entry_dirty(swp_entry_t entry) -{ - return false; -} #endif /* CONFIG_MIGRATION */ #ifdef CONFIG_MEMORY_FAILURE @@ -487,18 +462,6 @@ extern void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, extern void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd); -static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) -{ - swp_entry_t arch_entry; - - if (pmd_swp_soft_dirty(pmd)) - pmd = pmd_swp_clear_soft_dirty(pmd); - if (pmd_swp_uffd_wp(pmd)) - pmd = pmd_swp_clear_uffd_wp(pmd); - arch_entry = __pmd_to_swp_entry(pmd); - return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); -} - static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) { swp_entry_t arch_entry; @@ -507,23 +470,7 @@ static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) return __swp_entry_to_pmd(arch_entry); } -static inline int is_pmd_migration_entry(pmd_t pmd) -{ - swp_entry_t entry; - - if (pmd_present(pmd)) - return 0; - - entry = pmd_to_swp_entry(pmd); - return is_migration_entry(entry); -} #else /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ -static inline int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, - struct page *page) -{ - BUILD_BUG(); -} - static inline void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) { @@ -532,64 +479,17 @@ static inline void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, static inline void pmd_migration_entry_wait(struct mm_struct *m, pmd_t *p) { } -static inline swp_entry_t pmd_to_swp_entry(pmd_t pmd) -{ - return swp_entry(0, 0); -} - static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) { return __pmd(0); } -static inline int is_pmd_migration_entry(pmd_t pmd) -{ - return 0; -} #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ -#if defined(CONFIG_ZONE_DEVICE) && defined(CONFIG_ARCH_ENABLE_THP_MIGRATION) - -/** - * is_pmd_device_private_entry() - Check if PMD contains a device private swap entry - * @pmd: The PMD to check - * - * Returns true if the PMD contains a swap entry that represents a device private - * page mapping. This is used for zone device private pages that have been - * swapped out but still need special handling during various memory management - * operations. - * - * Return: 1 if PMD contains device private entry, 0 otherwise - */ -static inline int is_pmd_device_private_entry(pmd_t pmd) -{ - swp_entry_t entry; - - if (pmd_present(pmd)) - return 0; - - entry = pmd_to_swp_entry(pmd); - return is_device_private_entry(entry); -} - -#else /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ - -static inline int is_pmd_device_private_entry(pmd_t pmd) -{ - return 0; -} - -#endif /* CONFIG_ZONE_DEVICE && CONFIG_ARCH_ENABLE_THP_MIGRATION */ - static inline int non_swap_entry(swp_entry_t entry) { return swp_type(entry) >= MAX_SWAPFILES; } -static inline int is_pmd_non_present_folio_entry(pmd_t pmd) -{ - return is_pmd_migration_entry(pmd) || is_pmd_device_private_entry(pmd); -} - #endif /* CONFIG_MMU */ #endif /* _LINUX_SWAPOPS_H */ diff --git a/mm/damon/ops-common.c b/mm/damon/ops-common.c index 971df8a16ba4..a218d9922234 100644 --- a/mm/damon/ops-common.c +++ b/mm/damon/ops-common.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include "../internal.h" #include "ops-common.h" @@ -51,7 +51,7 @@ void damon_ptep_mkold(pte_t *pte, struct vm_area_struct *vma, unsigned long addr if (likely(pte_present(pteval))) pfn = pte_pfn(pteval); else - pfn = swp_offset_pfn(pte_to_swp_entry(pteval)); + pfn = softleaf_to_pfn(softleaf_from_pte(pteval)); folio = damon_get_folio(pfn); if (!folio) @@ -83,7 +83,7 @@ void damon_pmdp_mkold(pmd_t *pmd, struct vm_area_struct *vma, unsigned long addr if (likely(pmd_present(pmdval))) pfn = pmd_pfn(pmdval); else - pfn = swp_offset_pfn(pmd_to_swp_entry(pmdval)); + pfn = softleaf_to_pfn(softleaf_from_pmd(pmdval)); folio = damon_get_folio(pfn); if (!folio) diff --git a/mm/filemap.c b/mm/filemap.c index ff75bd89b68c..950d93885e38 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include @@ -1402,7 +1402,7 @@ static inline int folio_wait_bit_common(struct folio *folio, int bit_nr, * This follows the same logic as folio_wait_bit_common() so see the comments * there. */ -void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl) +void migration_entry_wait_on_locked(softleaf_t entry, spinlock_t *ptl) __releases(ptl) { struct wait_page_queue wait_page; @@ -1411,7 +1411,7 @@ void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl) unsigned long pflags; bool in_thrashing; wait_queue_head_t *q; - struct folio *folio = pfn_swap_entry_folio(entry); + struct folio *folio = softleaf_to_folio(entry); q = folio_waitqueue(folio); if (!folio_test_uptodate(folio) && folio_test_workingset(folio)) { diff --git a/mm/hmm.c b/mm/hmm.c index b11b4ebba945..bc3fa699a4c6 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include @@ -334,19 +334,19 @@ static int hmm_vma_handle_absent_pmd(struct mm_walk *walk, unsigned long start, struct hmm_vma_walk *hmm_vma_walk = walk->private; struct hmm_range *range = hmm_vma_walk->range; unsigned long npages = (end - start) >> PAGE_SHIFT; + const softleaf_t entry = softleaf_from_pmd(pmd); unsigned long addr = start; - swp_entry_t entry = pmd_to_swp_entry(pmd); unsigned int required_fault; - if (is_device_private_entry(entry) && - pfn_swap_entry_folio(entry)->pgmap->owner == + if (softleaf_is_device_private(entry) && + softleaf_to_folio(entry)->pgmap->owner == range->dev_private_owner) { unsigned long cpu_flags = HMM_PFN_VALID | hmm_pfn_flags_order(PMD_SHIFT - PAGE_SHIFT); - unsigned long pfn = swp_offset_pfn(entry); + unsigned long pfn = softleaf_to_pfn(entry); unsigned long i; - if (is_writable_device_private_entry(entry)) + if (softleaf_is_device_private_write(entry)) cpu_flags |= HMM_PFN_WRITE; /* @@ -365,7 +365,7 @@ static int hmm_vma_handle_absent_pmd(struct mm_walk *walk, unsigned long start, required_fault = hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0); if (required_fault) { - if (is_device_private_entry(entry)) + if (softleaf_is_device_private(entry)) return hmm_vma_fault(addr, end, required_fault, walk); else return -EFAULT; @@ -407,7 +407,7 @@ static int hmm_vma_walk_pmd(pmd_t *pmdp, if (pmd_none(pmd)) return hmm_vma_walk_hole(start, end, -1, walk); - if (thp_migration_supported() && is_pmd_migration_entry(pmd)) { + if (thp_migration_supported() && pmd_is_migration_entry(pmd)) { if (hmm_range_need_fault(hmm_vma_walk, hmm_pfns, npages, 0)) { hmm_vma_walk->last = addr; pmd_migration_entry_wait(walk->mm, pmdp); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 40a8a2c1e080..5876595b00d5 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1374,7 +1374,7 @@ vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf) struct vm_area_struct *vma = vmf->vma; vm_fault_t ret = 0; spinlock_t *ptl; - swp_entry_t swp_entry; + softleaf_t entry; struct page *page; struct folio *folio; @@ -1389,8 +1389,8 @@ vm_fault_t do_huge_pmd_device_private(struct vm_fault *vmf) return 0; } - swp_entry = pmd_to_swp_entry(vmf->orig_pmd); - page = pfn_swap_entry_to_page(swp_entry); + entry = softleaf_from_pmd(vmf->orig_pmd); + page = softleaf_to_page(entry); folio = page_folio(page); vmf->page = page; vmf->pte = NULL; @@ -1780,13 +1780,13 @@ static void copy_huge_non_present_pmd( struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, pmd_t pmd, pgtable_t pgtable) { - swp_entry_t entry = pmd_to_swp_entry(pmd); + softleaf_t entry = softleaf_from_pmd(pmd); struct folio *src_folio; - VM_WARN_ON(!is_pmd_non_present_folio_entry(pmd)); + VM_WARN_ON_ONCE(!pmd_is_valid_softleaf(pmd)); - if (is_writable_migration_entry(entry) || - is_readable_exclusive_migration_entry(entry)) { + if (softleaf_is_migration_write(entry) || + softleaf_is_migration_read_exclusive(entry)) { entry = make_readable_migration_entry(swp_offset(entry)); pmd = swp_entry_to_pmd(entry); if (pmd_swp_soft_dirty(*src_pmd)) @@ -1794,12 +1794,12 @@ static void copy_huge_non_present_pmd( if (pmd_swp_uffd_wp(*src_pmd)) pmd = pmd_swp_mkuffd_wp(pmd); set_pmd_at(src_mm, addr, src_pmd, pmd); - } else if (is_device_private_entry(entry)) { + } else if (softleaf_is_device_private(entry)) { /* * For device private entries, since there are no * read exclusive entries, writable = !readable */ - if (is_writable_device_private_entry(entry)) { + if (softleaf_is_device_private_write(entry)) { entry = make_readable_device_private_entry(swp_offset(entry)); pmd = swp_entry_to_pmd(entry); @@ -1810,7 +1810,7 @@ static void copy_huge_non_present_pmd( set_pmd_at(src_mm, addr, src_pmd, pmd); } - src_folio = pfn_swap_entry_folio(entry); + src_folio = softleaf_to_folio(entry); VM_WARN_ON(!folio_test_large(src_folio)); folio_get(src_folio); @@ -2270,7 +2270,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (unlikely(!pmd_present(orig_pmd))) { VM_BUG_ON(thp_migration_supported() && - !is_pmd_migration_entry(orig_pmd)); + !pmd_is_migration_entry(orig_pmd)); goto out; } @@ -2368,11 +2368,10 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, folio_remove_rmap_pmd(folio, page, vma); WARN_ON_ONCE(folio_mapcount(folio) < 0); VM_BUG_ON_PAGE(!PageHead(page), page); - } else if (is_pmd_non_present_folio_entry(orig_pmd)) { - swp_entry_t entry; + } else if (pmd_is_valid_softleaf(orig_pmd)) { + const softleaf_t entry = softleaf_from_pmd(orig_pmd); - entry = pmd_to_swp_entry(orig_pmd); - folio = pfn_swap_entry_folio(entry); + folio = softleaf_to_folio(entry); flush_needed = 0; if (!thp_migration_supported()) @@ -2428,7 +2427,7 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, static pmd_t move_soft_dirty_pmd(pmd_t pmd) { #ifdef CONFIG_MEM_SOFT_DIRTY - if (unlikely(is_pmd_migration_entry(pmd))) + if (unlikely(pmd_is_migration_entry(pmd))) pmd = pmd_swp_mksoft_dirty(pmd); else if (pmd_present(pmd)) pmd = pmd_mksoft_dirty(pmd); @@ -2503,12 +2502,12 @@ static void change_non_present_huge_pmd(struct mm_struct *mm, unsigned long addr, pmd_t *pmd, bool uffd_wp, bool uffd_wp_resolve) { - swp_entry_t entry = pmd_to_swp_entry(*pmd); - struct folio *folio = pfn_swap_entry_folio(entry); + softleaf_t entry = softleaf_from_pmd(*pmd); + const struct folio *folio = softleaf_to_folio(entry); pmd_t newpmd; - VM_WARN_ON(!is_pmd_non_present_folio_entry(*pmd)); - if (is_writable_migration_entry(entry)) { + VM_WARN_ON(!pmd_is_valid_softleaf(*pmd)); + if (softleaf_is_migration_write(entry)) { /* * A protection check is difficult so * just be safe and disable write @@ -2520,7 +2519,7 @@ static void change_non_present_huge_pmd(struct mm_struct *mm, newpmd = swp_entry_to_pmd(entry); if (pmd_swp_soft_dirty(*pmd)) newpmd = pmd_swp_mksoft_dirty(newpmd); - } else if (is_writable_device_private_entry(entry)) { + } else if (softleaf_is_device_private_write(entry)) { entry = make_readable_device_private_entry(swp_offset(entry)); newpmd = swp_entry_to_pmd(entry); } else { @@ -2718,7 +2717,7 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm if (!pmd_trans_huge(src_pmdval)) { spin_unlock(src_ptl); - if (is_pmd_migration_entry(src_pmdval)) { + if (pmd_is_migration_entry(src_pmdval)) { pmd_migration_entry_wait(mm, &src_pmdval); return -EAGAIN; } @@ -2983,13 +2982,12 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr; pte_t *pte; int i; - swp_entry_t entry; VM_BUG_ON(haddr & ~HPAGE_PMD_MASK); VM_BUG_ON_VMA(vma->vm_start > haddr, vma); VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma); - VM_WARN_ON(!is_pmd_non_present_folio_entry(*pmd) && !pmd_trans_huge(*pmd)); + VM_WARN_ON_ONCE(!pmd_is_valid_softleaf(*pmd) && !pmd_trans_huge(*pmd)); count_vm_event(THP_SPLIT_PMD); @@ -3003,11 +3001,10 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, zap_deposited_table(mm, pmd); if (!vma_is_dax(vma) && vma_is_special_huge(vma)) return; - if (unlikely(is_pmd_migration_entry(old_pmd))) { - swp_entry_t entry; + if (unlikely(pmd_is_migration_entry(old_pmd))) { + const softleaf_t old_entry = softleaf_from_pmd(old_pmd); - entry = pmd_to_swp_entry(old_pmd); - folio = pfn_swap_entry_folio(entry); + folio = softleaf_to_folio(old_entry); } else if (is_huge_zero_pmd(old_pmd)) { return; } else { @@ -3037,31 +3034,34 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, return __split_huge_zero_page_pmd(vma, haddr, pmd); } + if (pmd_is_migration_entry(*pmd)) { + softleaf_t entry; - if (is_pmd_migration_entry(*pmd)) { old_pmd = *pmd; - entry = pmd_to_swp_entry(old_pmd); - page = pfn_swap_entry_to_page(entry); + entry = softleaf_from_pmd(old_pmd); + page = softleaf_to_page(entry); folio = page_folio(page); soft_dirty = pmd_swp_soft_dirty(old_pmd); uffd_wp = pmd_swp_uffd_wp(old_pmd); - write = is_writable_migration_entry(entry); + write = softleaf_is_migration_write(entry); if (PageAnon(page)) - anon_exclusive = is_readable_exclusive_migration_entry(entry); - young = is_migration_entry_young(entry); - dirty = is_migration_entry_dirty(entry); - } else if (is_pmd_device_private_entry(*pmd)) { + anon_exclusive = softleaf_is_migration_read_exclusive(entry); + young = softleaf_is_migration_young(entry); + dirty = softleaf_is_migration_dirty(entry); + } else if (pmd_is_device_private_entry(*pmd)) { + softleaf_t entry; + old_pmd = *pmd; - entry = pmd_to_swp_entry(old_pmd); - page = pfn_swap_entry_to_page(entry); + entry = softleaf_from_pmd(old_pmd); + page = softleaf_to_page(entry); folio = page_folio(page); soft_dirty = pmd_swp_soft_dirty(old_pmd); uffd_wp = pmd_swp_uffd_wp(old_pmd); - write = is_writable_device_private_entry(entry); + write = softleaf_is_device_private_write(entry); anon_exclusive = PageAnonExclusive(page); /* @@ -3165,7 +3165,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, * Note that NUMA hinting access restrictions are not transferred to * avoid any possibility of altering permissions across VMAs. */ - if (freeze || is_pmd_migration_entry(old_pmd)) { + if (freeze || pmd_is_migration_entry(old_pmd)) { pte_t entry; swp_entry_t swp_entry; @@ -3191,7 +3191,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, VM_WARN_ON(!pte_none(ptep_get(pte + i))); set_pte_at(mm, addr, pte + i, entry); } - } else if (is_pmd_device_private_entry(old_pmd)) { + } else if (pmd_is_device_private_entry(old_pmd)) { pte_t entry; swp_entry_t swp_entry; @@ -3241,7 +3241,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, } pte_unmap(pte); - if (!is_pmd_migration_entry(*pmd)) + if (!pmd_is_migration_entry(*pmd)) folio_remove_rmap_pmd(folio, page, vma); if (freeze) put_page(page); @@ -3254,7 +3254,7 @@ void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, bool freeze) { VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE)); - if (pmd_trans_huge(*pmd) || is_pmd_non_present_folio_entry(*pmd)) + if (pmd_trans_huge(*pmd) || pmd_is_valid_softleaf(*pmd)) __split_huge_pmd_locked(vma, pmd, address, freeze); } @@ -4855,12 +4855,12 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) unsigned long address = pvmw->address; unsigned long haddr = address & HPAGE_PMD_MASK; pmd_t pmde; - swp_entry_t entry; + softleaf_t entry; if (!(pvmw->pmd && !pvmw->pte)) return; - entry = pmd_to_swp_entry(*pvmw->pmd); + entry = softleaf_from_pmd(*pvmw->pmd); folio_get(folio); pmde = folio_mk_pmd(folio, READ_ONCE(vma->vm_page_prot)); @@ -4876,20 +4876,20 @@ void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) if (pmd_swp_soft_dirty(*pvmw->pmd)) pmde = pmd_mksoft_dirty(pmde); - if (is_writable_migration_entry(entry)) + if (softleaf_is_migration_write(entry)) pmde = pmd_mkwrite(pmde, vma); if (pmd_swp_uffd_wp(*pvmw->pmd)) pmde = pmd_mkuffd_wp(pmde); - if (!is_migration_entry_young(entry)) + if (!softleaf_is_migration_young(entry)) pmde = pmd_mkold(pmde); /* NOTE: this may contain setting soft-dirty on some archs */ - if (folio_test_dirty(folio) && is_migration_entry_dirty(entry)) + if (folio_test_dirty(folio) && softleaf_is_migration_dirty(entry)) pmde = pmd_mkdirty(pmde); if (folio_test_anon(folio)) { rmap_t rmap_flags = RMAP_NONE; - if (!is_readable_migration_entry(entry)) + if (!softleaf_is_migration_read(entry)) rmap_flags |= RMAP_EXCLUSIVE; folio_add_anon_rmap_pmd(folio, new, vma, haddr, rmap_flags); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index a97ff7bcb232..1a08673b0d8b 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include @@ -941,7 +941,7 @@ static inline int check_pmd_state(pmd_t *pmd) * collapse it. Migration success or failure will eventually end * up with a present PMD mapping a folio again. */ - if (is_pmd_migration_entry(pmde)) + if (pmd_is_migration_entry(pmde)) return SCAN_PMD_MAPPED; if (!pmd_present(pmde)) return SCAN_PMD_NULL; diff --git a/mm/madvise.c b/mm/madvise.c index 58d82495b6c6..ffae3b566dc1 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -390,7 +390,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, if (unlikely(!pmd_present(orig_pmd))) { VM_BUG_ON(thp_migration_supported() && - !is_pmd_migration_entry(orig_pmd)); + !pmd_is_migration_entry(orig_pmd)); goto huge_unlock; } diff --git a/mm/memory.c b/mm/memory.c index fea079e5fb90..bf2bbd0dbc97 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6362,10 +6362,10 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, goto fallback; if (unlikely(!pmd_present(vmf.orig_pmd))) { - if (is_pmd_device_private_entry(vmf.orig_pmd)) + if (pmd_is_device_private_entry(vmf.orig_pmd)) return do_huge_pmd_device_private(&vmf); - if (is_pmd_migration_entry(vmf.orig_pmd)) + if (pmd_is_migration_entry(vmf.orig_pmd)) pmd_migration_entry_wait(mm, vmf.pmd); return 0; } diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 7ae3f5e2dee6..01c3b98f87a6 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -110,7 +110,7 @@ #include #include #include -#include +#include #include #include @@ -647,7 +647,7 @@ static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk) struct folio *folio; struct queue_pages *qp = walk->private; - if (unlikely(is_pmd_migration_entry(*pmd))) { + if (unlikely(pmd_is_migration_entry(*pmd))) { qp->nr_failed++; return; } diff --git a/mm/migrate.c b/mm/migrate.c index 862b2e261cf9..3b6bd374157d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include @@ -353,7 +353,7 @@ static bool remove_migration_pte(struct folio *folio, rmap_t rmap_flags = RMAP_NONE; pte_t old_pte; pte_t pte; - swp_entry_t entry; + softleaf_t entry; struct page *new; unsigned long idx = 0; @@ -379,22 +379,22 @@ static bool remove_migration_pte(struct folio *folio, folio_get(folio); pte = mk_pte(new, READ_ONCE(vma->vm_page_prot)); - entry = pte_to_swp_entry(old_pte); - if (!is_migration_entry_young(entry)) + entry = softleaf_from_pte(old_pte); + if (!softleaf_is_migration_young(entry)) pte = pte_mkold(pte); - if (folio_test_dirty(folio) && is_migration_entry_dirty(entry)) + if (folio_test_dirty(folio) && softleaf_is_migration_dirty(entry)) pte = pte_mkdirty(pte); if (pte_swp_soft_dirty(old_pte)) pte = pte_mksoft_dirty(pte); else pte = pte_clear_soft_dirty(pte); - if (is_writable_migration_entry(entry)) + if (softleaf_is_migration_write(entry)) pte = pte_mkwrite(pte, vma); else if (pte_swp_uffd_wp(old_pte)) pte = pte_mkuffd_wp(pte); - if (folio_test_anon(folio) && !is_readable_migration_entry(entry)) + if (folio_test_anon(folio) && !softleaf_is_migration_read(entry)) rmap_flags |= RMAP_EXCLUSIVE; if (unlikely(is_device_private_page(new))) { @@ -404,7 +404,7 @@ static bool remove_migration_pte(struct folio *folio, else entry = make_readable_device_private_entry( page_to_pfn(new)); - pte = swp_entry_to_pte(entry); + pte = softleaf_to_pte(entry); if (pte_swp_soft_dirty(old_pte)) pte = pte_swp_mksoft_dirty(pte); if (pte_swp_uffd_wp(old_pte)) @@ -543,9 +543,9 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) spinlock_t *ptl; ptl = pmd_lock(mm, pmd); - if (!is_pmd_migration_entry(*pmd)) + if (!pmd_is_migration_entry(*pmd)) goto unlock; - migration_entry_wait_on_locked(pmd_to_swp_entry(*pmd), ptl); + migration_entry_wait_on_locked(softleaf_from_pmd(*pmd), ptl); return; unlock: spin_unlock(ptl); diff --git a/mm/migrate_device.c b/mm/migrate_device.c index c869b272e85a..880f26a316f8 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include "internal.h" @@ -145,7 +145,6 @@ static int migrate_vma_collect_huge_pmd(pmd_t *pmdp, unsigned long start, struct folio *folio; struct migrate_vma *migrate = walk->private; spinlock_t *ptl; - swp_entry_t entry; int ret; unsigned long write = 0; @@ -169,23 +168,24 @@ static int migrate_vma_collect_huge_pmd(pmd_t *pmdp, unsigned long start, if (pmd_write(*pmdp)) write = MIGRATE_PFN_WRITE; } else if (!pmd_present(*pmdp)) { - entry = pmd_to_swp_entry(*pmdp); - folio = pfn_swap_entry_folio(entry); + const softleaf_t entry = softleaf_from_pmd(*pmdp); - if (!is_device_private_entry(entry) || + folio = softleaf_to_folio(entry); + + if (!softleaf_is_device_private(entry) || !(migrate->flags & MIGRATE_VMA_SELECT_DEVICE_PRIVATE) || (folio->pgmap->owner != migrate->pgmap_owner)) { spin_unlock(ptl); return migrate_vma_collect_skip(start, end, walk); } - if (is_migration_entry(entry)) { + if (softleaf_is_migration(entry)) { migration_entry_wait_on_locked(entry, ptl); spin_unlock(ptl); return -EAGAIN; } - if (is_writable_device_private_entry(entry)) + if (softleaf_is_device_private_write(entry)) write = MIGRATE_PFN_WRITE; } else { spin_unlock(ptl); diff --git a/mm/page_table_check.c b/mm/page_table_check.c index f5f25e120f69..9af1ecff5221 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #undef pr_fmt #define pr_fmt(fmt) "page_table_check: " fmt @@ -179,10 +179,10 @@ void __page_table_check_pud_clear(struct mm_struct *mm, pud_t pud) EXPORT_SYMBOL(__page_table_check_pud_clear); /* Whether the swap entry cached writable information */ -static inline bool swap_cached_writable(swp_entry_t entry) +static inline bool softleaf_cached_writable(softleaf_t entry) { - return is_writable_device_private_entry(entry) || - is_writable_migration_entry(entry); + return softleaf_is_device_private(entry) || + softleaf_is_migration_write(entry); } static void page_table_check_pte_flags(pte_t pte) @@ -190,9 +190,9 @@ static void page_table_check_pte_flags(pte_t pte) if (pte_present(pte)) { WARN_ON_ONCE(pte_uffd_wp(pte) && pte_write(pte)); } else if (pte_swp_uffd_wp(pte)) { - const swp_entry_t entry = pte_to_swp_entry(pte); + const softleaf_t entry = softleaf_from_pte(pte); - WARN_ON_ONCE(swap_cached_writable(entry)); + WARN_ON_ONCE(softleaf_cached_writable(entry)); } } @@ -219,9 +219,9 @@ static inline void page_table_check_pmd_flags(pmd_t pmd) if (pmd_uffd_wp(pmd)) WARN_ON_ONCE(pmd_write(pmd)); } else if (pmd_swp_uffd_wp(pmd)) { - swp_entry_t entry = pmd_to_swp_entry(pmd); + const softleaf_t entry = softleaf_from_pmd(pmd); - WARN_ON_ONCE(swap_cached_writable(entry)); + WARN_ON_ONCE(softleaf_cached_writable(entry)); } } diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index a4e23818f37f..8137d2366722 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -242,18 +242,19 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) */ pmde = pmdp_get_lockless(pvmw->pmd); - if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) { + if (pmd_trans_huge(pmde) || pmd_is_migration_entry(pmde)) { pvmw->ptl = pmd_lock(mm, pvmw->pmd); pmde = *pvmw->pmd; if (!pmd_present(pmde)) { - swp_entry_t entry; + softleaf_t entry; if (!thp_migration_supported() || !(pvmw->flags & PVMW_MIGRATION)) return not_found(pvmw); - entry = pmd_to_swp_entry(pmde); - if (!is_migration_entry(entry) || - !check_pmd(swp_offset_pfn(entry), pvmw)) + entry = softleaf_from_pmd(pmde); + + if (!softleaf_is_migration(entry) || + !check_pmd(softleaf_to_pfn(entry), pvmw)) return not_found(pvmw); return true; } @@ -273,9 +274,9 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) * cannot return prematurely, while zap_huge_pmd() has * cleared *pmd but not decremented compound_mapcount(). */ - swp_entry_t entry = pmd_to_swp_entry(pmde); + const softleaf_t entry = softleaf_from_pmd(pmde); - if (is_device_private_entry(entry)) { + if (softleaf_is_device_private(entry)) { pvmw->ptl = pmd_lock(mm, pvmw->pmd); return true; } diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 9f91cf85a5be..3067feb970d1 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include @@ -966,10 +966,10 @@ struct folio *folio_walk_start(struct folio_walk *fw, goto found; } } else if ((flags & FW_MIGRATION) && - is_pmd_migration_entry(pmd)) { - swp_entry_t entry = pmd_to_swp_entry(pmd); + pmd_is_migration_entry(pmd)) { + const softleaf_t entry = softleaf_from_pmd(pmd); - page = pfn_swap_entry_to_page(entry); + page = softleaf_to_page(entry); expose_page = false; goto found; } diff --git a/mm/rmap.c b/mm/rmap.c index 1954c538a991..775710115a41 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -57,7 +57,7 @@ #include #include #include -#include +#include #include #include #include @@ -2341,7 +2341,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, if (likely(pmd_present(pmdval))) pfn = pmd_pfn(pmdval); else - pfn = swp_offset_pfn(pmd_to_swp_entry(pmdval)); + pfn = softleaf_to_pfn(softleaf_from_pmd(pmdval)); subpage = folio_page(folio, pfn - folio_pfn(folio)); -- 2.51.0 The leaf entry PMD case is confusing as only migration entries and device private entries are valid at PMD level, not true swap entries. We repeatedly perform checks of the form is_swap_pmd() || pmd_trans_huge() which is itself confusing - it implies that leaf entries at PMD level exist and are different from huge entries. Address this confusion by introduced pmd_is_huge() which checks for either case. Sadly due to header dependency issues (huge_mm.h is included very early on in headers and cannot really rely on much else) we cannot use pmd_is_valid_softleaf() here. However since these are the only valid, handled cases the function is still achieving what it intends to do. We then replace all instances of is_swap_pmd() || pmd_trans_huge() with pmd_is_huge() invocations and adjust logic accordingly to accommodate this. No functional change intended. Signed-off-by: Lorenzo Stoakes --- include/linux/huge_mm.h | 39 +++++++++++++++++++++++++++++++++++---- include/linux/swapops.h | 6 ++++++ mm/huge_memory.c | 3 ++- mm/memory.c | 4 ++-- mm/mprotect.c | 2 +- mm/mremap.c | 2 +- 6 files changed, 47 insertions(+), 9 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index cbb2243f8e56..403e13009631 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -419,10 +419,36 @@ void reparent_deferred_split_queue(struct mem_cgroup *memcg); void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long address, bool freeze); +/** + * pmd_is_huge() - Is this PMD either a huge PMD entry or a software leaf entry? + * @pmd: The PMD to check. + * + * A huge PMD entry is a non-empty entry which is present and marked huge or a + * software leaf entry. This check be performed without the appropriate locks + * held, in which case the condition should be rechecked after they are + * acquired. + * + * Returns: true if this PMD is huge, false otherwise. + */ +static inline bool pmd_is_huge(pmd_t pmd) +{ + if (pmd_present(pmd)) { + return pmd_trans_huge(pmd); + } else if (!pmd_none(pmd)) { + /* + * Non-present PMDs must be valid huge non-present entries. We + * cannot assert that here due to header dependency issues. + */ + return true; + } + + return false; +} + #define split_huge_pmd(__vma, __pmd, __address) \ do { \ pmd_t *____pmd = (__pmd); \ - if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd)) \ + if (pmd_is_huge(*____pmd)) \ __split_huge_pmd(__vma, __pmd, __address, \ false); \ } while (0) @@ -469,10 +495,10 @@ static inline int is_swap_pmd(pmd_t pmd) static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { - if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd)) + if (pmd_is_huge(*pmd)) return __pmd_trans_huge_lock(pmd, vma); - else - return NULL; + + return NULL; } static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma) @@ -744,6 +770,11 @@ static inline struct folio *get_persistent_huge_zero_folio(void) { return NULL; } + +static inline bool pmd_is_huge(pmd_t pmd) +{ + return false; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline int split_folio_to_list_to_order(struct folio *folio, diff --git a/include/linux/swapops.h b/include/linux/swapops.h index f1277647262d..41cfc6d59054 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -471,6 +471,12 @@ static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) } #else /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ +static inline int set_pmd_migration_entry(struct page_vma_mapped_walk *pvmw, + struct page *page) +{ + BUILD_BUG(); +} + static inline void remove_migration_pmd(struct page_vma_mapped_walk *pvmw, struct page *new) { diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 5876595b00d5..2f0bdc987596 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2810,8 +2810,9 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { spinlock_t *ptl; + ptl = pmd_lock(vma->vm_mm, pmd); - if (likely(is_swap_pmd(*pmd) || pmd_trans_huge(*pmd))) + if (likely(pmd_is_huge(*pmd))) return ptl; spin_unlock(ptl); return NULL; diff --git a/mm/memory.c b/mm/memory.c index bf2bbd0dbc97..087f31a291b4 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1374,7 +1374,7 @@ copy_pmd_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, src_pmd = pmd_offset(src_pud, addr); do { next = pmd_addr_end(addr, end); - if (is_swap_pmd(*src_pmd) || pmd_trans_huge(*src_pmd)) { + if (pmd_is_huge(*src_pmd)) { int err; VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma); @@ -1923,7 +1923,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); - if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd)) { + if (pmd_is_huge(*pmd)) { if (next - addr != HPAGE_PMD_SIZE) __split_huge_pmd(vma, pmd, addr, false); else if (zap_huge_pmd(tlb, vma, pmd, addr)) { diff --git a/mm/mprotect.c b/mm/mprotect.c index a3e360a8cdec..ab014ce17f9c 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -474,7 +474,7 @@ static inline long change_pmd_range(struct mmu_gather *tlb, goto next; _pmd = pmdp_get_lockless(pmd); - if (is_swap_pmd(_pmd) || pmd_trans_huge(_pmd)) { + if (pmd_is_huge(_pmd)) { if ((next - addr != HPAGE_PMD_SIZE) || pgtable_split_needed(vma, cp_flags)) { __split_huge_pmd(vma, pmd, addr, false); diff --git a/mm/mremap.c b/mm/mremap.c index 62b6827abacf..fdb0485ede74 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -850,7 +850,7 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc) if (!new_pmd) break; again: - if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd)) { + if (pmd_is_huge(*old_pmd)) { if (extent == HPAGE_PMD_SIZE && move_pgt_entry(pmc, HPAGE_PMD, old_pmd, new_pmd)) continue; -- 2.51.0 Update copy_huge_pmd() and change_huge_pmd() to use pmd_is_valid_softleaf() - as this checks for the only valid non-present huge PMD states. Also update mm/debug_vm_pgtable.c to explicitly test for a valid leaf PMD entry (which it was not before, which was incorrect), and have it test against pmd_is_huge() and pmd_is_valid_softleaf() rather than is_swap_pmd(). With these changes done there are no further users of is_swap_pmd(), so remove it. Signed-off-by: Lorenzo Stoakes --- include/linux/huge_mm.h | 9 --------- mm/debug_vm_pgtable.c | 25 +++++++++++++++---------- mm/huge_memory.c | 5 +++-- 3 files changed, 18 insertions(+), 21 deletions(-) diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 403e13009631..79f16b5aa5f0 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -486,11 +486,6 @@ void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start, spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma); spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma); -static inline int is_swap_pmd(pmd_t pmd) -{ - return !pmd_none(pmd) && !pmd_present(pmd); -} - /* mmap_lock must be held on entry */ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) @@ -693,10 +688,6 @@ static inline void vma_adjust_trans_huge(struct vm_area_struct *vma, struct vm_area_struct *next) { } -static inline int is_swap_pmd(pmd_t pmd) -{ - return 0; -} static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index fff311830959..608d1011ce03 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -74,6 +74,7 @@ struct pgtable_debug_args { unsigned long fixed_pte_pfn; swp_entry_t swp_entry; + swp_entry_t leaf_entry; }; static void __init pte_basic_tests(struct pgtable_debug_args *args, int idx) @@ -745,7 +746,7 @@ static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) WARN_ON(pmd_soft_dirty(pmd_clear_soft_dirty(pmd))); } -static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) +static void __init pmd_leaf_soft_dirty_tests(struct pgtable_debug_args *args) { pmd_t pmd; @@ -757,15 +758,16 @@ static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) return; pr_debug("Validating PMD swap soft dirty\n"); - pmd = swp_entry_to_pmd(args->swp_entry); - WARN_ON(!is_swap_pmd(pmd)); + pmd = swp_entry_to_pmd(args->leaf_entry); + WARN_ON(!pmd_is_huge(pmd)); + WARN_ON(!pmd_is_valid_softleaf(pmd)); WARN_ON(!pmd_swp_soft_dirty(pmd_swp_mksoft_dirty(pmd))); WARN_ON(pmd_swp_soft_dirty(pmd_swp_clear_soft_dirty(pmd))); } #else /* !CONFIG_TRANSPARENT_HUGEPAGE */ static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) { } -static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) { } +static void __init pmd_leaf_soft_dirty_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ static void __init pte_swap_exclusive_tests(struct pgtable_debug_args *args) @@ -818,7 +820,7 @@ static void __init pte_swap_tests(struct pgtable_debug_args *args) } #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION -static void __init pmd_swap_tests(struct pgtable_debug_args *args) +static void __init pmd_softleaf_tests(struct pgtable_debug_args *args) { swp_entry_t arch_entry; pmd_t pmd1, pmd2; @@ -827,15 +829,16 @@ static void __init pmd_swap_tests(struct pgtable_debug_args *args) return; pr_debug("Validating PMD swap\n"); - pmd1 = swp_entry_to_pmd(args->swp_entry); - WARN_ON(!is_swap_pmd(pmd1)); + pmd1 = swp_entry_to_pmd(args->leaf_entry); + WARN_ON(!pmd_is_huge(pmd1)); + WARN_ON(!pmd_is_valid_softleaf(pmd1)); arch_entry = __pmd_to_swp_entry(pmd1); pmd2 = __swp_entry_to_pmd(arch_entry); WARN_ON(memcmp(&pmd1, &pmd2, sizeof(pmd1))); } #else /* !CONFIG_ARCH_ENABLE_THP_MIGRATION */ -static void __init pmd_swap_tests(struct pgtable_debug_args *args) { } +static void __init pmd_softleaf_tests(struct pgtable_debug_args *args) { } #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ static void __init swap_migration_tests(struct pgtable_debug_args *args) @@ -1229,6 +1232,8 @@ static int __init init_args(struct pgtable_debug_args *args) max_swap_offset = swp_offset(pte_to_swp_entry(swp_entry_to_pte(swp_entry(0, ~0UL)))); /* Create a swp entry with all possible bits set while still being swap. */ args->swp_entry = swp_entry(MAX_SWAPFILES - 1, max_swap_offset); + /* Create a non-present migration entry. */ + args->leaf_entry = make_writable_migration_entry(~0UL); /* * Allocate (huge) pages because some of the tests need to access @@ -1318,12 +1323,12 @@ static int __init debug_vm_pgtable(void) pte_soft_dirty_tests(&args); pmd_soft_dirty_tests(&args); pte_swap_soft_dirty_tests(&args); - pmd_swap_soft_dirty_tests(&args); + pmd_leaf_soft_dirty_tests(&args); pte_swap_exclusive_tests(&args); pte_swap_tests(&args); - pmd_swap_tests(&args); + pmd_softleaf_tests(&args); swap_migration_tests(&args); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 2f0bdc987596..d1a5c5f01d94 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1875,7 +1875,8 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, ret = -EAGAIN; pmd = *src_pmd; - if (unlikely(thp_migration_supported() && is_swap_pmd(pmd))) { + if (unlikely(thp_migration_supported() && + pmd_is_valid_softleaf(pmd))) { copy_huge_non_present_pmd(dst_mm, src_mm, dst_pmd, src_pmd, addr, dst_vma, src_vma, pmd, pgtable); ret = 0; @@ -2562,7 +2563,7 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (!ptl) return 0; - if (thp_migration_supported() && is_swap_pmd(*pmd)) { + if (thp_migration_supported() && pmd_is_valid_softleaf(*pmd)) { change_non_present_huge_pmd(mm, addr, pmd, uffd_wp, uffd_wp_resolve); goto unlock; -- 2.51.0 There is simply no need for the hugely confusing concept of 'non-swap' swap entries now we have the concept of softleaf entries and relevant softleaf_xxx() helpers. Adjust all callers to use these instead and remove non_swap_entry() altogether. No functional change intended. Signed-off-by: Lorenzo Stoakes --- arch/s390/mm/gmap_helpers.c | 20 ++++++++++---------- arch/s390/mm/pgtable.c | 12 ++++++------ fs/proc/task_mmu.c | 12 ++++++------ include/linux/swapops.h | 5 ----- mm/filemap.c | 2 +- mm/hmm.c | 16 ++++++++-------- mm/madvise.c | 2 +- mm/memory.c | 36 ++++++++++++++++++------------------ mm/mincore.c | 2 +- mm/userfaultfd.c | 24 ++++++++++++------------ 10 files changed, 63 insertions(+), 68 deletions(-) diff --git a/arch/s390/mm/gmap_helpers.c b/arch/s390/mm/gmap_helpers.c index d4c3c36855e2..549f14ad08af 100644 --- a/arch/s390/mm/gmap_helpers.c +++ b/arch/s390/mm/gmap_helpers.c @@ -11,27 +11,27 @@ #include #include #include -#include +#include #include #include #include #include /** - * ptep_zap_swap_entry() - discard a swap entry. + * ptep_zap_softleaf_entry() - discard a software leaf entry. * @mm: the mm - * @entry: the swap entry that needs to be zapped + * @entry: the software leaf entry that needs to be zapped * - * Discards the given swap entry. If the swap entry was an actual swap - * entry (and not a migration entry, for example), the actual swapped + * Discards the given software leaf entry. If the leaf entry was an actual + * swap entry (and not a migration entry, for example), the actual swapped * page is also discarded from swap. */ -static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) +static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry) { - if (!non_swap_entry(entry)) + if (softleaf_is_swap(entry)) dec_mm_counter(mm, MM_SWAPENTS); - else if (is_migration_entry(entry)) - dec_mm_counter(mm, mm_counter(pfn_swap_entry_folio(entry))); + else if (softleaf_is_migration(entry)) + dec_mm_counter(mm, mm_counter(softleaf_to_folio(entry))); free_swap_and_cache(entry); } @@ -66,7 +66,7 @@ void gmap_helper_zap_one_page(struct mm_struct *mm, unsigned long vmaddr) preempt_disable(); pgste = pgste_get_lock(ptep); - ptep_zap_swap_entry(mm, pte_to_swp_entry(*ptep)); + ptep_zap_softleaf_entry(mm, softleaf_from_pte(*ptep)); pte_clear(mm, vmaddr, ptep); pgste_set_unlock(ptep, pgste); diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 0fde20bbc50b..d670bfb47d9b 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include @@ -683,12 +683,12 @@ void ptep_unshadow_pte(struct mm_struct *mm, unsigned long saddr, pte_t *ptep) pgste_set_unlock(ptep, pgste); } -static void ptep_zap_swap_entry(struct mm_struct *mm, swp_entry_t entry) +static void ptep_zap_softleaf_entry(struct mm_struct *mm, softleaf_t entry) { - if (!non_swap_entry(entry)) + if (softleaf_is_swap(entry)) dec_mm_counter(mm, MM_SWAPENTS); - else if (is_migration_entry(entry)) { - struct folio *folio = pfn_swap_entry_folio(entry); + else if (softleaf_is_migration(entry)) { + struct folio *folio = softleaf_to_folio(entry); dec_mm_counter(mm, mm_counter(folio)); } @@ -710,7 +710,7 @@ void ptep_zap_unused(struct mm_struct *mm, unsigned long addr, if (!reset && pte_swap(pte) && ((pgstev & _PGSTE_GPS_USAGE_MASK) == _PGSTE_GPS_USAGE_UNUSED || (pgstev & _PGSTE_GPS_ZERO))) { - ptep_zap_swap_entry(mm, pte_to_swp_entry(pte)); + ptep_zap_softleaf_entry(mm, softleaf_from_pte(pte)); pte_clear(mm, addr, ptep); } if (reset) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index d982fdfcf057..6cb9e1691e18 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1020,13 +1020,13 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, } else if (pte_none(ptent)) { smaps_pte_hole_lookup(addr, walk); } else { - swp_entry_t swpent = pte_to_swp_entry(ptent); + const softleaf_t entry = softleaf_from_pte(ptent); - if (!non_swap_entry(swpent)) { + if (softleaf_is_swap(entry)) { int mapcount; mss->swap += PAGE_SIZE; - mapcount = swp_swapcount(swpent); + mapcount = swp_swapcount(entry); if (mapcount >= 2) { u64 pss_delta = (u64)PAGE_SIZE << PSS_SHIFT; @@ -1035,10 +1035,10 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr, } else { mss->swap_pss += (u64)PAGE_SIZE << PSS_SHIFT; } - } else if (is_pfn_swap_entry(swpent)) { - if (is_device_private_entry(swpent)) + } else if (softleaf_has_pfn(entry)) { + if (softleaf_is_device_private(entry)) present = true; - page = pfn_swap_entry_to_page(swpent); + page = softleaf_to_page(entry); } } diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 41cfc6d59054..c8e6f927da48 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -492,10 +492,5 @@ static inline pmd_t swp_entry_to_pmd(swp_entry_t entry) #endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ -static inline int non_swap_entry(swp_entry_t entry) -{ - return swp_type(entry) >= MAX_SWAPFILES; -} - #endif /* CONFIG_MMU */ #endif /* _LINUX_SWAPOPS_H */ diff --git a/mm/filemap.c b/mm/filemap.c index 950d93885e38..ab8ff5b2fc3b 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -4566,7 +4566,7 @@ static void filemap_cachestat(struct address_space *mapping, swp_entry_t swp = radix_to_swp_entry(folio); /* swapin error results in poisoned entry */ - if (non_swap_entry(swp)) + if (!softleaf_is_swap(swp)) goto resched; /* diff --git a/mm/hmm.c b/mm/hmm.c index bc3fa699a4c6..d5c4e60fbfad 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -253,17 +253,17 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, } if (!pte_present(pte)) { - swp_entry_t entry = pte_to_swp_entry(pte); + const softleaf_t entry = softleaf_from_pte(pte); /* * Don't fault in device private pages owned by the caller, * just report the PFN. */ - if (is_device_private_entry(entry) && - page_pgmap(pfn_swap_entry_to_page(entry))->owner == + if (softleaf_is_device_private(entry) && + page_pgmap(softleaf_to_page(entry))->owner == range->dev_private_owner) { cpu_flags = HMM_PFN_VALID; - if (is_writable_device_private_entry(entry)) + if (softleaf_is_device_private_write(entry)) cpu_flags |= HMM_PFN_WRITE; new_pfn_flags = swp_offset_pfn(entry) | cpu_flags; goto out; @@ -274,16 +274,16 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, if (!required_fault) goto out; - if (!non_swap_entry(entry)) + if (softleaf_is_swap(entry)) goto fault; - if (is_device_private_entry(entry)) + if (softleaf_is_device_private(entry)) goto fault; - if (is_device_exclusive_entry(entry)) + if (softleaf_is_device_exclusive(entry)) goto fault; - if (is_migration_entry(entry)) { + if (softleaf_is_migration(entry)) { pte_unmap(ptep); hmm_vma_walk->last = addr; migration_entry_wait(walk->mm, pmdp, addr); diff --git a/mm/madvise.c b/mm/madvise.c index ffae3b566dc1..234178685793 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -249,7 +249,7 @@ static void shmem_swapin_range(struct vm_area_struct *vma, continue; entry = radix_to_swp_entry(folio); /* There might be swapin error entries in shmem mapping. */ - if (non_swap_entry(entry)) + if (!softleaf_is_swap(entry)) continue; addr = vma->vm_start + diff --git a/mm/memory.c b/mm/memory.c index 087f31a291b4..ad336cbf1d88 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -932,7 +932,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, struct folio *folio; struct page *page; - if (likely(!non_swap_entry(entry))) { + if (likely(softleaf_is_swap(entry))) { if (swap_duplicate(entry) < 0) return -EIO; @@ -950,12 +950,12 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, set_pte_at(src_mm, addr, src_pte, pte); } rss[MM_SWAPENTS]++; - } else if (is_migration_entry(entry)) { - folio = pfn_swap_entry_folio(entry); + } else if (softleaf_is_migration(entry)) { + folio = softleaf_to_folio(entry); rss[mm_counter(folio)]++; - if (!is_readable_migration_entry(entry) && + if (!softleaf_is_migration_read(entry) && is_cow_mapping(vm_flags)) { /* * COW mappings require pages in both parent and child @@ -964,15 +964,15 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, */ entry = make_readable_migration_entry( swp_offset(entry)); - pte = swp_entry_to_pte(entry); + pte = softleaf_to_pte(entry); if (pte_swp_soft_dirty(orig_pte)) pte = pte_swp_mksoft_dirty(pte); if (pte_swp_uffd_wp(orig_pte)) pte = pte_swp_mkuffd_wp(pte); set_pte_at(src_mm, addr, src_pte, pte); } - } else if (is_device_private_entry(entry)) { - page = pfn_swap_entry_to_page(entry); + } else if (softleaf_is_device_private(entry)) { + page = softleaf_to_page(entry); folio = page_folio(page); /* @@ -996,7 +996,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, * when a device driver is involved (you cannot easily * save and restore device driver state). */ - if (is_writable_device_private_entry(entry) && + if (softleaf_is_device_private_write(entry) && is_cow_mapping(vm_flags)) { entry = make_readable_device_private_entry( swp_offset(entry)); @@ -1005,7 +1005,7 @@ copy_nonpresent_pte(struct mm_struct *dst_mm, struct mm_struct *src_mm, pte = pte_swp_mkuffd_wp(pte); set_pte_at(src_mm, addr, src_pte, pte); } - } else if (is_device_exclusive_entry(entry)) { + } else if (softleaf_is_device_exclusive(entry)) { /* * Make device exclusive entries present by restoring the * original entry then copying as for a present pte. Device @@ -4635,7 +4635,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) rmap_t rmap_flags = RMAP_NONE; bool need_clear_cache = false; bool exclusive = false; - swp_entry_t entry; + softleaf_t entry; pte_t pte; vm_fault_t ret = 0; void *shadow = NULL; @@ -4647,15 +4647,15 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) if (!pte_unmap_same(vmf)) goto out; - entry = pte_to_swp_entry(vmf->orig_pte); - if (unlikely(non_swap_entry(entry))) { - if (is_migration_entry(entry)) { + entry = softleaf_from_pte(vmf->orig_pte); + if (unlikely(!softleaf_is_swap(entry))) { + if (softleaf_is_migration(entry)) { migration_entry_wait(vma->vm_mm, vmf->pmd, vmf->address); - } else if (is_device_exclusive_entry(entry)) { - vmf->page = pfn_swap_entry_to_page(entry); + } else if (softleaf_is_device_exclusive(entry)) { + vmf->page = softleaf_to_page(entry); ret = remove_device_exclusive_entry(vmf); - } else if (is_device_private_entry(entry)) { + } else if (softleaf_is_device_private(entry)) { if (vmf->flags & FAULT_FLAG_VMA_LOCK) { /* * migrate_to_ram is not yet ready to operate @@ -4666,7 +4666,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) goto out; } - vmf->page = pfn_swap_entry_to_page(entry); + vmf->page = softleaf_to_page(entry); vmf->pte = pte_offset_map_lock(vma->vm_mm, vmf->pmd, vmf->address, &vmf->ptl); if (unlikely(!vmf->pte || @@ -4690,7 +4690,7 @@ vm_fault_t do_swap_page(struct vm_fault *vmf) } else { pte_unmap_unlock(vmf->pte, vmf->ptl); } - } else if (is_hwpoison_entry(entry)) { + } else if (softleaf_is_hwpoison(entry)) { ret = VM_FAULT_HWPOISON; } else if (softleaf_is_marker(entry)) { ret = handle_pte_marker(vmf); diff --git a/mm/mincore.c b/mm/mincore.c index e77c5bc88fc7..e1d50f198c42 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -74,7 +74,7 @@ static unsigned char mincore_swap(swp_entry_t entry, bool shmem) * absent. Page table may contain migration or hwpoison * entries which are always uptodate. */ - if (non_swap_entry(entry)) + if (!softleaf_is_swap(entry)) return !shmem; /* diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 055ec1050776..bd1f74a7a5ac 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1256,7 +1256,6 @@ static long move_pages_ptes(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd unsigned long dst_addr, unsigned long src_addr, unsigned long len, __u64 mode) { - swp_entry_t entry; struct swap_info_struct *si = NULL; pte_t orig_src_pte, orig_dst_pte; pte_t src_folio_pte; @@ -1430,19 +1429,20 @@ static long move_pages_ptes(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd orig_dst_pte, orig_src_pte, dst_pmd, dst_pmdval, dst_ptl, src_ptl, &src_folio, len); - } else { + } else { /* !pte_present() */ struct folio *folio = NULL; + const softleaf_t entry = softleaf_from_pte(orig_src_pte); - entry = pte_to_swp_entry(orig_src_pte); - if (non_swap_entry(entry)) { - if (is_migration_entry(entry)) { - pte_unmap(src_pte); - pte_unmap(dst_pte); - src_pte = dst_pte = NULL; - migration_entry_wait(mm, src_pmd, src_addr); - ret = -EAGAIN; - } else - ret = -EFAULT; + if (softleaf_is_migration(entry)) { + pte_unmap(src_pte); + pte_unmap(dst_pte); + src_pte = dst_pte = NULL; + migration_entry_wait(mm, src_pmd, src_addr); + + ret = -EAGAIN; + goto out; + } else if (!softleaf_is_swap(entry)) { + ret = -EFAULT; goto out; } -- 2.51.0 We do not need to have explicit helper functions for these, it adds a level of confusion and indirection when we can simply use software leaf entry logic here instead and spell out the special huge_pte_none() case we must consider. No functional change intended. Signed-off-by: Lorenzo Stoakes --- fs/proc/task_mmu.c | 19 +++++---- include/linux/hugetlb.h | 2 - mm/hugetlb.c | 91 +++++++++++++++++------------------------ mm/mempolicy.c | 17 +++++--- mm/migrate.c | 15 +++++-- 5 files changed, 69 insertions(+), 75 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 6cb9e1691e18..3cdefa7546db 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -2499,22 +2499,23 @@ static void make_uffd_wp_huge_pte(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t ptent) { - unsigned long psize; + const unsigned long psize = huge_page_size(hstate_vma(vma)); + softleaf_t entry; - if (is_hugetlb_entry_hwpoisoned(ptent) || pte_is_marker(ptent)) - return; + if (huge_pte_none(ptent)) + set_huge_pte_at(vma->vm_mm, addr, ptep, + make_pte_marker(PTE_MARKER_UFFD_WP), psize); - psize = huge_page_size(hstate_vma(vma)); + entry = softleaf_from_pte(ptent); + if (softleaf_is_hwpoison(entry) || softleaf_is_marker(entry)) + return; - if (is_hugetlb_entry_migration(ptent)) + if (softleaf_is_migration(entry)) set_huge_pte_at(vma->vm_mm, addr, ptep, pte_swp_mkuffd_wp(ptent), psize); - else if (!huge_pte_none(ptent)) + else huge_ptep_modify_prot_commit(vma, addr, ptep, ptent, huge_pte_mkuffd_wp(ptent)); - else - set_huge_pte_at(vma->vm_mm, addr, ptep, - make_pte_marker(PTE_MARKER_UFFD_WP), psize); } #endif /* CONFIG_HUGETLB_PAGE */ diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 2387513d6ae5..457d48ac7bcd 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -274,8 +274,6 @@ void hugetlb_vma_lock_release(struct kref *kref); long hugetlb_change_protection(struct vm_area_struct *vma, unsigned long address, unsigned long end, pgprot_t newprot, unsigned long cp_flags); -bool is_hugetlb_entry_migration(pte_t pte); -bool is_hugetlb_entry_hwpoisoned(pte_t pte); void hugetlb_unshare_all_pmds(struct vm_area_struct *vma); void fixup_hugetlb_reservations(struct vm_area_struct *vma); void hugetlb_split(struct vm_area_struct *vma, unsigned long addr); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index a74cde267c2a..b702b161ab35 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5552,32 +5552,6 @@ static void set_huge_ptep_maybe_writable(struct vm_area_struct *vma, set_huge_ptep_writable(vma, address, ptep); } -bool is_hugetlb_entry_migration(pte_t pte) -{ - swp_entry_t swp; - - if (huge_pte_none(pte) || pte_present(pte)) - return false; - swp = pte_to_swp_entry(pte); - if (is_migration_entry(swp)) - return true; - else - return false; -} - -bool is_hugetlb_entry_hwpoisoned(pte_t pte) -{ - swp_entry_t swp; - - if (huge_pte_none(pte) || pte_present(pte)) - return false; - swp = pte_to_swp_entry(pte); - if (is_hwpoison_entry(swp)) - return true; - else - return false; -} - static void hugetlb_install_folio(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr, struct folio *new_folio, pte_t old, unsigned long sz) @@ -5606,6 +5580,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, unsigned long npages = pages_per_huge_page(h); struct mmu_notifier_range range; unsigned long last_addr_mask; + softleaf_t softleaf; int ret = 0; if (cow) { @@ -5653,16 +5628,16 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, entry = huge_ptep_get(src_vma->vm_mm, addr, src_pte); again: if (huge_pte_none(entry)) { - /* - * Skip if src entry none. - */ - ; - } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) { + /* Skip if src entry none. */ + goto next; + } + + softleaf = softleaf_from_pte(entry); + if (unlikely(softleaf_is_hwpoison(softleaf))) { if (!userfaultfd_wp(dst_vma)) entry = huge_pte_clear_uffd_wp(entry); set_huge_pte_at(dst, addr, dst_pte, entry, sz); - } else if (unlikely(is_hugetlb_entry_migration(entry))) { - softleaf_t softleaf = softleaf_from_pte(entry); + } else if (unlikely(softleaf_is_migration(softleaf))) { bool uffd_wp = pte_swp_uffd_wp(entry); if (!is_readable_migration_entry(softleaf) && cow) { @@ -5681,7 +5656,6 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, entry = huge_pte_clear_uffd_wp(entry); set_huge_pte_at(dst, addr, dst_pte, entry, sz); } else if (unlikely(pte_is_marker(entry))) { - const softleaf_t softleaf = softleaf_from_pte(entry); const pte_marker marker = copy_pte_marker(softleaf, dst_vma); if (marker) @@ -5739,9 +5713,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, } hugetlb_install_folio(dst_vma, dst_pte, addr, new_folio, src_pte_old, sz); - spin_unlock(src_ptl); - spin_unlock(dst_ptl); - continue; + goto next; } if (cow) { @@ -5762,6 +5734,8 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, set_huge_pte_at(dst, addr, dst_pte, entry, sz); hugetlb_count_add(npages, dst); } + +next: spin_unlock(src_ptl); spin_unlock(dst_ptl); } @@ -6770,8 +6744,10 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, ret = 0; /* Not present, either a migration or a hwpoisoned entry */ - if (!pte_present(vmf.orig_pte)) { - if (is_hugetlb_entry_migration(vmf.orig_pte)) { + if (!pte_present(vmf.orig_pte) && !huge_pte_none(vmf.orig_pte)) { + const softleaf_t softleaf = softleaf_from_pte(vmf.orig_pte); + + if (softleaf_is_migration(softleaf)) { /* * Release the hugetlb fault lock now, but retain * the vma lock, because it is needed to guard the @@ -6782,9 +6758,12 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, mutex_unlock(&hugetlb_fault_mutex_table[hash]); migration_entry_wait_huge(vma, vmf.address, vmf.pte); return 0; - } else if (is_hugetlb_entry_hwpoisoned(vmf.orig_pte)) + } + if (softleaf_is_hwpoison(softleaf)) { ret = VM_FAULT_HWPOISON_LARGE | VM_FAULT_SET_HINDEX(hstate_index(h)); + } + goto out_mutex; } @@ -7166,7 +7145,9 @@ long hugetlb_change_protection(struct vm_area_struct *vma, i_mmap_lock_write(vma->vm_file->f_mapping); last_addr_mask = hugetlb_mask_last_page(h); for (; address < end; address += psize) { + softleaf_t entry; spinlock_t *ptl; + ptep = hugetlb_walk(vma, address, psize); if (!ptep) { if (!uffd_wp) { @@ -7198,15 +7179,23 @@ long hugetlb_change_protection(struct vm_area_struct *vma, continue; } pte = huge_ptep_get(mm, address, ptep); - if (unlikely(is_hugetlb_entry_hwpoisoned(pte))) { - /* Nothing to do. */ - } else if (unlikely(is_hugetlb_entry_migration(pte))) { - softleaf_t entry = softleaf_from_pte(pte); + if (huge_pte_none(pte)) { + if (unlikely(uffd_wp)) + /* Safe to modify directly (none->non-present). */ + set_huge_pte_at(mm, address, ptep, + make_pte_marker(PTE_MARKER_UFFD_WP), + psize); + goto next; + } + entry = softleaf_from_pte(pte); + if (unlikely(softleaf_is_hwpoison(entry))) { + /* Nothing to do. */ + } else if (unlikely(softleaf_is_migration(entry))) { struct folio *folio = softleaf_to_folio(entry); pte_t newpte = pte; - if (is_writable_migration_entry(entry)) { + if (softleaf_is_migration_write(entry)) { if (folio_test_anon(folio)) entry = make_readable_exclusive_migration_entry( swp_offset(entry)); @@ -7233,7 +7222,7 @@ long hugetlb_change_protection(struct vm_area_struct *vma, if (pte_is_uffd_wp_marker(pte) && uffd_wp_resolve) /* Safe to modify directly (non-present->none). */ huge_pte_clear(mm, address, ptep, psize); - } else if (!huge_pte_none(pte)) { + } else { pte_t old_pte; unsigned int shift = huge_page_shift(hstate_vma(vma)); @@ -7246,16 +7235,10 @@ long hugetlb_change_protection(struct vm_area_struct *vma, pte = huge_pte_clear_uffd_wp(pte); huge_ptep_modify_prot_commit(vma, address, ptep, old_pte, pte); pages++; - } else { - /* None pte */ - if (unlikely(uffd_wp)) - /* Safe to modify directly (none->non-present). */ - set_huge_pte_at(mm, address, ptep, - make_pte_marker(PTE_MARKER_UFFD_WP), - psize); } - spin_unlock(ptl); +next: + spin_unlock(ptl); cond_resched(); } /* diff --git a/mm/mempolicy.c b/mm/mempolicy.c index 01c3b98f87a6..dee95d5ecfd4 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -768,16 +768,21 @@ static int queue_folios_hugetlb(pte_t *pte, unsigned long hmask, unsigned long flags = qp->flags; struct folio *folio; spinlock_t *ptl; - pte_t entry; + pte_t ptep; ptl = huge_pte_lock(hstate_vma(walk->vma), walk->mm, pte); - entry = huge_ptep_get(walk->mm, addr, pte); - if (!pte_present(entry)) { - if (unlikely(is_hugetlb_entry_migration(entry))) - qp->nr_failed++; + ptep = huge_ptep_get(walk->mm, addr, pte); + if (!pte_present(ptep)) { + if (!huge_pte_none(ptep)) { + const softleaf_t entry = softleaf_from_pte(ptep); + + if (unlikely(softleaf_is_migration(entry))) + qp->nr_failed++; + } + goto unlock; } - folio = pfn_folio(pte_pfn(entry)); + folio = pfn_folio(pte_pfn(ptep)); if (!queue_folio_required(folio, qp)) goto unlock; if (!(flags & (MPOL_MF_MOVE | MPOL_MF_MOVE_ALL)) || diff --git a/mm/migrate.c b/mm/migrate.c index 3b6bd374157d..48f98a6c1ad2 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -515,16 +515,18 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep) { spinlock_t *ptl = huge_pte_lockptr(hstate_vma(vma), vma->vm_mm, ptep); + softleaf_t entry; pte_t pte; hugetlb_vma_assert_locked(vma); spin_lock(ptl); pte = huge_ptep_get(vma->vm_mm, addr, ptep); - if (unlikely(!is_hugetlb_entry_migration(pte))) { - spin_unlock(ptl); - hugetlb_vma_unlock_read(vma); - } else { + if (huge_pte_none(pte)) + goto fail; + + entry = softleaf_from_pte(pte); + if (softleaf_is_migration(entry)) { /* * If migration entry existed, safe to release vma lock * here because the pgtable page won't be freed without the @@ -533,7 +535,12 @@ void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, p */ hugetlb_vma_unlock_read(vma); migration_entry_wait_on_locked(pte_to_swp_entry(pte), ptl); + return; } + +fail: + spin_unlock(ptl); + hugetlb_vma_unlock_read(vma); } #endif -- 2.51.0 Having converted so much of the code base to software leaf entries, we can mop up some remaining cases. We replace is_pfn_swap_entry(), pfn_swap_entry_to_page(), is_writable_device_private_entry(), is_device_exclusive_entry(), is_migration_entry(), is_writable_migration_entry(), is_readable_migration_entry(), swp_offset_pfn() and pfn_swap_entry_folio() with softleaf equivalents. No functional change intended. Signed-off-by: Lorenzo Stoakes --- fs/proc/task_mmu.c | 14 ++--- include/linux/leafops.h | 25 +++++++-- include/linux/swapops.h | 121 +--------------------------------------- mm/debug_vm_pgtable.c | 20 +++---- mm/hmm.c | 2 +- mm/hugetlb.c | 2 +- mm/ksm.c | 6 +- mm/memory-failure.c | 6 +- mm/memory.c | 3 +- mm/mempolicy.c | 4 +- mm/migrate.c | 6 +- mm/migrate_device.c | 10 ++-- mm/mprotect.c | 8 +-- mm/page_vma_mapped.c | 8 +-- mm/pagewalk.c | 7 +-- mm/rmap.c | 9 ++- 16 files changed, 75 insertions(+), 176 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 3cdefa7546db..4deded872c46 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1940,13 +1940,13 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, if (pte_uffd_wp(pte)) flags |= PM_UFFD_WP; } else { - swp_entry_t entry; + softleaf_t entry; if (pte_swp_soft_dirty(pte)) flags |= PM_SOFT_DIRTY; if (pte_swp_uffd_wp(pte)) flags |= PM_UFFD_WP; - entry = pte_to_swp_entry(pte); + entry = softleaf_from_pte(pte); if (pm->show_pfn) { pgoff_t offset; @@ -1954,16 +1954,16 @@ static pagemap_entry_t pte_to_pagemap_entry(struct pagemapread *pm, * For PFN swap offsets, keeping the offset field * to be PFN only to be compatible with old smaps. */ - if (is_pfn_swap_entry(entry)) - offset = swp_offset_pfn(entry); + if (softleaf_has_pfn(entry)) + offset = softleaf_to_pfn(entry); else offset = swp_offset(entry); frame = swp_type(entry) | (offset << MAX_SWAPFILES_SHIFT); } flags |= PM_SWAP; - if (is_pfn_swap_entry(entry)) - page = pfn_swap_entry_to_page(entry); + if (softleaf_has_pfn(entry)) + page = softleaf_to_page(entry); if (softleaf_is_uffd_wp_marker(entry)) flags |= PM_UFFD_WP; if (softleaf_is_guard_marker(entry)) @@ -2032,7 +2032,7 @@ static int pagemap_pmd_range_thp(pmd_t *pmdp, unsigned long addr, if (pmd_swp_uffd_wp(pmd)) flags |= PM_UFFD_WP; VM_WARN_ON_ONCE(!pmd_is_migration_entry(pmd)); - page = pfn_swap_entry_to_page(entry); + page = softleaf_to_page(entry); } if (page) { diff --git a/include/linux/leafops.h b/include/linux/leafops.h index 9be9a4e8ada4..d593093ba70c 100644 --- a/include/linux/leafops.h +++ b/include/linux/leafops.h @@ -355,7 +355,7 @@ static inline unsigned long softleaf_to_pfn(softleaf_t entry) VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); /* Temporary until swp_entry_t eliminated. */ - return swp_offset_pfn(entry); + return swp_offset(entry) & SWP_PFN_MASK; } /** @@ -366,10 +366,16 @@ static inline unsigned long softleaf_to_pfn(softleaf_t entry) */ static inline struct page *softleaf_to_page(softleaf_t entry) { + struct page *page = pfn_to_page(softleaf_to_pfn(entry)); + VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); + /* + * Any use of migration entries may only occur while the + * corresponding page is locked + */ + VM_WARN_ON_ONCE(softleaf_is_migration(entry) && !PageLocked(page)); - /* Temporary until swp_entry_t eliminated. */ - return pfn_swap_entry_to_page(entry); + return page; } /** @@ -381,10 +387,17 @@ static inline struct page *softleaf_to_page(softleaf_t entry) */ static inline struct folio *softleaf_to_folio(softleaf_t entry) { - VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); + struct folio *folio = pfn_folio(softleaf_to_pfn(entry)); - /* Temporary until swp_entry_t eliminated. */ - return pfn_swap_entry_folio(entry); + VM_WARN_ON_ONCE(!softleaf_has_pfn(entry)); + /* + * Any use of migration entries may only occur while the + * corresponding folio is locked. + */ + VM_WARN_ON_ONCE(softleaf_is_migration(entry) && + !folio_test_locked(folio)); + + return folio; } /** diff --git a/include/linux/swapops.h b/include/linux/swapops.h index c8e6f927da48..3d02b288c15e 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -28,7 +28,7 @@ #define SWP_OFFSET_MASK ((1UL << SWP_TYPE_SHIFT) - 1) /* - * Definitions only for PFN swap entries (see is_pfn_swap_entry()). To + * Definitions only for PFN swap entries (see leafeant_has_pfn()). To * store PFN, we only need SWP_PFN_BITS bits. Each of the pfn swap entries * can use the extra bits to store other information besides PFN. */ @@ -66,8 +66,6 @@ #define SWP_MIG_YOUNG BIT(SWP_MIG_YOUNG_BIT) #define SWP_MIG_DIRTY BIT(SWP_MIG_DIRTY_BIT) -static inline bool is_pfn_swap_entry(swp_entry_t entry); - /* Clear all flags but only keep swp_entry_t related information */ static inline pte_t pte_swp_clear_flags(pte_t pte) { @@ -109,17 +107,6 @@ static inline pgoff_t swp_offset(swp_entry_t entry) return entry.val & SWP_OFFSET_MASK; } -/* - * This should only be called upon a pfn swap entry to get the PFN stored - * in the swap entry. Please refers to is_pfn_swap_entry() for definition - * of pfn swap entry. - */ -static inline unsigned long swp_offset_pfn(swp_entry_t entry) -{ - VM_BUG_ON(!is_pfn_swap_entry(entry)); - return swp_offset(entry) & SWP_PFN_MASK; -} - /* * Convert the arch-dependent pte representation of a swp_entry_t into an * arch-independent swp_entry_t. @@ -169,27 +156,11 @@ static inline swp_entry_t make_writable_device_private_entry(pgoff_t offset) return swp_entry(SWP_DEVICE_WRITE, offset); } -static inline bool is_device_private_entry(swp_entry_t entry) -{ - int type = swp_type(entry); - return type == SWP_DEVICE_READ || type == SWP_DEVICE_WRITE; -} - -static inline bool is_writable_device_private_entry(swp_entry_t entry) -{ - return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); -} - static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset) { return swp_entry(SWP_DEVICE_EXCLUSIVE, offset); } -static inline bool is_device_exclusive_entry(swp_entry_t entry) -{ - return swp_type(entry) == SWP_DEVICE_EXCLUSIVE; -} - #else /* CONFIG_DEVICE_PRIVATE */ static inline swp_entry_t make_readable_device_private_entry(pgoff_t offset) { @@ -201,50 +172,14 @@ static inline swp_entry_t make_writable_device_private_entry(pgoff_t offset) return swp_entry(0, 0); } -static inline bool is_device_private_entry(swp_entry_t entry) -{ - return false; -} - -static inline bool is_writable_device_private_entry(swp_entry_t entry) -{ - return false; -} - static inline swp_entry_t make_device_exclusive_entry(pgoff_t offset) { return swp_entry(0, 0); } -static inline bool is_device_exclusive_entry(swp_entry_t entry) -{ - return false; -} - #endif /* CONFIG_DEVICE_PRIVATE */ #ifdef CONFIG_MIGRATION -static inline int is_migration_entry(swp_entry_t entry) -{ - return unlikely(swp_type(entry) == SWP_MIGRATION_READ || - swp_type(entry) == SWP_MIGRATION_READ_EXCLUSIVE || - swp_type(entry) == SWP_MIGRATION_WRITE); -} - -static inline int is_writable_migration_entry(swp_entry_t entry) -{ - return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE); -} - -static inline int is_readable_migration_entry(swp_entry_t entry) -{ - return unlikely(swp_type(entry) == SWP_MIGRATION_READ); -} - -static inline int is_readable_exclusive_migration_entry(swp_entry_t entry) -{ - return unlikely(swp_type(entry) == SWP_MIGRATION_READ_EXCLUSIVE); -} static inline swp_entry_t make_readable_migration_entry(pgoff_t offset) { @@ -310,23 +245,10 @@ static inline swp_entry_t make_writable_migration_entry(pgoff_t offset) return swp_entry(0, 0); } -static inline int is_migration_entry(swp_entry_t swp) -{ - return 0; -} - static inline void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, unsigned long address) { } static inline void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { } -static inline int is_writable_migration_entry(swp_entry_t entry) -{ - return 0; -} -static inline int is_readable_migration_entry(swp_entry_t entry) -{ - return 0; -} static inline swp_entry_t make_migration_entry_young(swp_entry_t entry) { @@ -410,47 +332,6 @@ static inline swp_entry_t make_guard_swp_entry(void) return make_pte_marker_entry(PTE_MARKER_GUARD); } -static inline struct page *pfn_swap_entry_to_page(swp_entry_t entry) -{ - struct page *p = pfn_to_page(swp_offset_pfn(entry)); - - /* - * Any use of migration entries may only occur while the - * corresponding page is locked - */ - BUG_ON(is_migration_entry(entry) && !PageLocked(p)); - - return p; -} - -static inline struct folio *pfn_swap_entry_folio(swp_entry_t entry) -{ - struct folio *folio = pfn_folio(swp_offset_pfn(entry)); - - /* - * Any use of migration entries may only occur while the - * corresponding folio is locked - */ - BUG_ON(is_migration_entry(entry) && !folio_test_locked(folio)); - - return folio; -} - -/* - * A pfn swap entry is a special type of swap entry that always has a pfn stored - * in the swap offset. They can either be used to represent unaddressable device - * memory, to restrict access to a page undergoing migration or to represent a - * pfn which has been hwpoisoned and unmapped. - */ -static inline bool is_pfn_swap_entry(swp_entry_t entry) -{ - /* Make sure the swp offset can always store the needed fields */ - BUILD_BUG_ON(SWP_TYPE_SHIFT < SWP_PFN_BITS); - - return is_migration_entry(entry) || is_device_private_entry(entry) || - is_device_exclusive_entry(entry) || is_hwpoison_entry(entry); -} - struct page_vma_mapped_walk; #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 608d1011ce03..64db85a80558 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -844,7 +844,7 @@ static void __init pmd_softleaf_tests(struct pgtable_debug_args *args) { } static void __init swap_migration_tests(struct pgtable_debug_args *args) { struct page *page; - swp_entry_t swp; + softleaf_t entry; if (!IS_ENABLED(CONFIG_MIGRATION)) return; @@ -867,17 +867,17 @@ static void __init swap_migration_tests(struct pgtable_debug_args *args) * be locked, otherwise it stumbles upon a BUG_ON(). */ __SetPageLocked(page); - swp = make_writable_migration_entry(page_to_pfn(page)); - WARN_ON(!is_migration_entry(swp)); - WARN_ON(!is_writable_migration_entry(swp)); + entry = make_writable_migration_entry(page_to_pfn(page)); + WARN_ON(!softleaf_is_migration(entry)); + WARN_ON(!softleaf_is_migration_write(entry)); - swp = make_readable_migration_entry(swp_offset(swp)); - WARN_ON(!is_migration_entry(swp)); - WARN_ON(is_writable_migration_entry(swp)); + entry = make_readable_migration_entry(swp_offset(entry)); + WARN_ON(!softleaf_is_migration(entry)); + WARN_ON(softleaf_is_migration_write(entry)); - swp = make_readable_migration_entry(page_to_pfn(page)); - WARN_ON(!is_migration_entry(swp)); - WARN_ON(is_writable_migration_entry(swp)); + entry = make_readable_migration_entry(page_to_pfn(page)); + WARN_ON(!softleaf_is_migration(entry)); + WARN_ON(softleaf_is_migration_write(entry)); __ClearPageLocked(page); } diff --git a/mm/hmm.c b/mm/hmm.c index d5c4e60fbfad..f91c38d4507a 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -265,7 +265,7 @@ static int hmm_vma_handle_pte(struct mm_walk *walk, unsigned long addr, cpu_flags = HMM_PFN_VALID; if (softleaf_is_device_private_write(entry)) cpu_flags |= HMM_PFN_WRITE; - new_pfn_flags = swp_offset_pfn(entry) | cpu_flags; + new_pfn_flags = softleaf_to_pfn(entry) | cpu_flags; goto out; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index b702b161ab35..f7f18a3ea495 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -5640,7 +5640,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src, } else if (unlikely(softleaf_is_migration(softleaf))) { bool uffd_wp = pte_swp_uffd_wp(entry); - if (!is_readable_migration_entry(softleaf) && cow) { + if (!softleaf_is_migration_read(softleaf) && cow) { /* * COW mappings require pages in both * parent and child to be set to read. diff --git a/mm/ksm.c b/mm/ksm.c index 7cd19a6ce45f..b911df37f04e 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -637,14 +637,14 @@ static int break_ksm_pmd_entry(pmd_t *pmdp, unsigned long addr, unsigned long en if (pte_present(pte)) { folio = vm_normal_folio(walk->vma, addr, pte); } else if (!pte_none(pte)) { - swp_entry_t entry = pte_to_swp_entry(pte); + const softleaf_t entry = softleaf_from_pte(pte); /* * As KSM pages remain KSM pages until freed, no need to wait * here for migration to end. */ - if (is_migration_entry(entry)) - folio = pfn_swap_entry_folio(entry); + if (softleaf_is_migration(entry)) + folio = softleaf_to_folio(entry); } /* return 1 if the page is an normal ksm page or KSM-placed zero page */ found = (folio && folio_test_ksm(folio)) || is_ksm_zero_pte(pte); diff --git a/mm/memory-failure.c b/mm/memory-failure.c index acc35c881547..6e79da3de221 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -691,10 +691,10 @@ static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift, if (pte_present(pte)) { pfn = pte_pfn(pte); } else { - swp_entry_t swp = pte_to_swp_entry(pte); + const softleaf_t entry = softleaf_from_pte(pte); - if (is_hwpoison_entry(swp)) - pfn = swp_offset_pfn(swp); + if (softleaf_is_hwpoison(entry)) + pfn = softleaf_to_pfn(entry); } if (!pfn || pfn != poisoned_pfn) diff --git a/mm/memory.c b/mm/memory.c index ad336cbf1d88..accd275cd651 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -902,7 +902,8 @@ static void restore_exclusive_pte(struct vm_area_struct *vma, static int try_restore_exclusive_pte(struct vm_area_struct *vma, unsigned long addr, pte_t *ptep, pte_t orig_pte) { - struct page *page = pfn_swap_entry_to_page(pte_to_swp_entry(orig_pte)); + const softleaf_t entry = softleaf_from_pte(orig_pte); + struct page *page = softleaf_to_page(entry); struct folio *folio = page_folio(page); if (folio_trylock(folio)) { diff --git a/mm/mempolicy.c b/mm/mempolicy.c index dee95d5ecfd4..acb9bf89f619 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -705,7 +705,9 @@ static int queue_folios_pte_range(pmd_t *pmd, unsigned long addr, if (pte_none(ptent)) continue; if (!pte_present(ptent)) { - if (is_migration_entry(pte_to_swp_entry(ptent))) + const softleaf_t entry = softleaf_from_pte(ptent); + + if (softleaf_is_migration(entry)) qp->nr_failed++; continue; } diff --git a/mm/migrate.c b/mm/migrate.c index 48f98a6c1ad2..182a5b7b2ead 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -483,7 +483,7 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, spinlock_t *ptl; pte_t *ptep; pte_t pte; - swp_entry_t entry; + softleaf_t entry; ptep = pte_offset_map_lock(mm, pmd, address, &ptl); if (!ptep) @@ -495,8 +495,8 @@ void migration_entry_wait(struct mm_struct *mm, pmd_t *pmd, if (pte_none(pte) || pte_present(pte)) goto out; - entry = pte_to_swp_entry(pte); - if (!is_migration_entry(entry)) + entry = softleaf_from_pte(pte); + if (!softleaf_is_migration(entry)) goto out; migration_entry_wait_on_locked(entry, ptl); diff --git a/mm/migrate_device.c b/mm/migrate_device.c index 880f26a316f8..c50abbd32f21 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -282,7 +282,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, unsigned long mpfn = 0, pfn; struct folio *folio; struct page *page; - swp_entry_t entry; + softleaf_t entry; pte_t pte; pte = ptep_get(ptep); @@ -301,11 +301,11 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, * page table entry. Other special swap entries are not * migratable, and we ignore regular swapped page. */ - entry = pte_to_swp_entry(pte); - if (!is_device_private_entry(entry)) + entry = softleaf_from_pte(pte); + if (!softleaf_is_device_private(entry)) goto next; - page = pfn_swap_entry_to_page(entry); + page = softleaf_to_page(entry); pgmap = page_pgmap(page); if (!(migrate->flags & MIGRATE_VMA_SELECT_DEVICE_PRIVATE) || @@ -331,7 +331,7 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, mpfn = migrate_pfn(page_to_pfn(page)) | MIGRATE_PFN_MIGRATE; - if (is_writable_device_private_entry(entry)) + if (softleaf_is_device_private_write(entry)) mpfn |= MIGRATE_PFN_WRITE; } else { pfn = pte_pfn(pte); diff --git a/mm/mprotect.c b/mm/mprotect.c index ab014ce17f9c..476a29cc89bf 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -317,11 +317,11 @@ static long change_pte_range(struct mmu_gather *tlb, pages++; } } else { - swp_entry_t entry = pte_to_swp_entry(oldpte); + softleaf_t entry = softleaf_from_pte(oldpte); pte_t newpte; - if (is_writable_migration_entry(entry)) { - struct folio *folio = pfn_swap_entry_folio(entry); + if (softleaf_is_migration_write(entry)) { + const struct folio *folio = softleaf_to_folio(entry); /* * A protection check is difficult so @@ -335,7 +335,7 @@ static long change_pte_range(struct mmu_gather *tlb, newpte = swp_entry_to_pte(entry); if (pte_swp_soft_dirty(oldpte)) newpte = pte_swp_mksoft_dirty(newpte); - } else if (is_writable_device_private_entry(entry)) { + } else if (softleaf_is_device_private_write(entry)) { /* * We do not preserve soft-dirtiness. See * copy_nonpresent_pte() for explanation. diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index 8137d2366722..b38a1d00c971 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -49,7 +49,7 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp, if (is_migration) return false; } else if (!is_migration) { - swp_entry_t entry; + softleaf_t entry; /* * Handle un-addressable ZONE_DEVICE memory. @@ -67,9 +67,9 @@ static bool map_pte(struct page_vma_mapped_walk *pvmw, pmd_t *pmdvalp, * For more details on device private memory see HMM * (include/linux/hmm.h or mm/hmm.c). */ - entry = pte_to_swp_entry(ptent); - if (!is_device_private_entry(entry) && - !is_device_exclusive_entry(entry)) + entry = softleaf_from_pte(ptent); + if (!softleaf_is_device_private(entry) && + !softleaf_is_device_exclusive(entry)) return false; } spin_lock(*ptlp); diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 3067feb970d1..d6e29da60d09 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -1000,11 +1000,10 @@ struct folio *folio_walk_start(struct folio_walk *fw, goto found; } } else if (!pte_none(pte)) { - swp_entry_t entry = pte_to_swp_entry(pte); + const softleaf_t entry = softleaf_from_pte(pte); - if ((flags & FW_MIGRATION) && - is_migration_entry(entry)) { - page = pfn_swap_entry_to_page(entry); + if ((flags & FW_MIGRATION) && softleaf_is_migration(entry)) { + page = softleaf_to_page(entry); expose_page = false; goto found; } diff --git a/mm/rmap.c b/mm/rmap.c index 775710115a41..345466ad396b 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1969,7 +1969,7 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, if (likely(pte_present(pteval))) { pfn = pte_pfn(pteval); } else { - pfn = swp_offset_pfn(pte_to_swp_entry(pteval)); + pfn = softleaf_to_pfn(pte_to_swp_entry(pteval)); VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); } @@ -2368,7 +2368,7 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, if (likely(pte_present(pteval))) { pfn = pte_pfn(pteval); } else { - pfn = swp_offset_pfn(pte_to_swp_entry(pteval)); + pfn = softleaf_to_pfn(pte_to_swp_entry(pteval)); VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); } @@ -2453,8 +2453,11 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, folio_mark_dirty(folio); writable = pte_write(pteval); } else { + const softleaf_t entry = softleaf_from_pte(pteval); + pte_clear(mm, address, pvmw.pte); - writable = is_writable_device_private_entry(pte_to_swp_entry(pteval)); + + writable = softleaf_is_device_private_write(entry); } VM_WARN_ON_FOLIO(writable && folio_test_anon(folio) && -- 2.51.0 There are straggler invocations of pte_to_swp_entry() lying around, replace all of these with the software leaf entry equivalent - softleaf_from_pte(). With those removed, eliminate pte_to_swp_entry() altogether. No functional change intended. Signed-off-by: Lorenzo Stoakes --- include/linux/leafops.h | 7 ++++++- include/linux/swapops.h | 13 ------------- mm/debug_vm_pgtable.c | 2 +- mm/internal.h | 7 +++++-- mm/memory-failure.c | 2 +- mm/memory.c | 16 ++++++++-------- mm/migrate.c | 2 +- mm/mincore.c | 4 +++- mm/rmap.c | 8 ++++++-- mm/swapfile.c | 5 +++-- 10 files changed, 34 insertions(+), 32 deletions(-) diff --git a/include/linux/leafops.h b/include/linux/leafops.h index d593093ba70c..a464a7e08c76 100644 --- a/include/linux/leafops.h +++ b/include/linux/leafops.h @@ -54,11 +54,16 @@ static inline softleaf_t softleaf_mk_none(void) */ static inline softleaf_t softleaf_from_pte(pte_t pte) { + softleaf_t arch_entry; + if (pte_present(pte)) return softleaf_mk_none(); + pte = pte_swp_clear_flags(pte); + arch_entry = __pte_to_swp_entry(pte); + /* Temporary until swp_entry_t eliminated. */ - return pte_to_swp_entry(pte); + return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); } /** diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 3d02b288c15e..8cfc966eae48 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -107,19 +107,6 @@ static inline pgoff_t swp_offset(swp_entry_t entry) return entry.val & SWP_OFFSET_MASK; } -/* - * Convert the arch-dependent pte representation of a swp_entry_t into an - * arch-independent swp_entry_t. - */ -static inline swp_entry_t pte_to_swp_entry(pte_t pte) -{ - swp_entry_t arch_entry; - - pte = pte_swp_clear_flags(pte); - arch_entry = __pte_to_swp_entry(pte); - return swp_entry(__swp_type(arch_entry), __swp_offset(arch_entry)); -} - /* * Convert the arch-independent representation of a swp_entry_t into the * arch-dependent pte representation. diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 64db85a80558..1eae87dbef73 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -1229,7 +1229,7 @@ static int __init init_args(struct pgtable_debug_args *args) init_fixed_pfns(args); /* See generic_max_swapfile_size(): probe the maximum offset */ - max_swap_offset = swp_offset(pte_to_swp_entry(swp_entry_to_pte(swp_entry(0, ~0UL)))); + max_swap_offset = swp_offset(softleaf_from_pte(softleaf_to_pte(swp_entry(0, ~0UL)))); /* Create a swp entry with all possible bits set while still being swap. */ args->swp_entry = swp_entry(MAX_SWAPFILES - 1, max_swap_offset); /* Create a non-present migration entry. */ diff --git a/mm/internal.h b/mm/internal.h index f0c7461bb02c..985605ba3364 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -334,7 +334,7 @@ unsigned int folio_pte_batch(struct folio *folio, pte_t *ptep, pte_t pte, */ static inline pte_t pte_move_swp_offset(pte_t pte, long delta) { - swp_entry_t entry = pte_to_swp_entry(pte); + const softleaf_t entry = softleaf_from_pte(pte); pte_t new = __swp_entry_to_pte(__swp_entry(swp_type(entry), (swp_offset(entry) + delta))); @@ -389,11 +389,14 @@ static inline int swap_pte_batch(pte_t *start_ptep, int max_nr, pte_t pte) cgroup_id = lookup_swap_cgroup_id(entry); while (ptep < end_ptep) { + softleaf_t entry; + pte = ptep_get(ptep); if (!pte_same(pte, expected_pte)) break; - if (lookup_swap_cgroup_id(pte_to_swp_entry(pte)) != cgroup_id) + entry = softleaf_from_pte(pte); + if (lookup_swap_cgroup_id(entry) != cgroup_id) break; expected_pte = pte_next_swp_offset(expected_pte); ptep++; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 6e79da3de221..ca2204c4647e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -50,7 +50,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/mm/memory.c b/mm/memory.c index accd275cd651..f9a2c608aff9 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1218,7 +1218,7 @@ copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, spinlock_t *src_ptl, *dst_ptl; int progress, max_nr, ret = 0; int rss[NR_MM_COUNTERS]; - swp_entry_t entry = (swp_entry_t){0}; + softleaf_t entry = softleaf_mk_none(); struct folio *prealloc = NULL; int nr; @@ -1282,7 +1282,7 @@ copy_pte_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, dst_vma, src_vma, addr, rss); if (ret == -EIO) { - entry = pte_to_swp_entry(ptep_get(src_pte)); + entry = softleaf_from_pte(ptep_get(src_pte)); break; } else if (ret == -EBUSY) { break; @@ -4456,13 +4456,13 @@ static struct folio *__alloc_swap_folio(struct vm_fault *vmf) { struct vm_area_struct *vma = vmf->vma; struct folio *folio; - swp_entry_t entry; + softleaf_t entry; folio = vma_alloc_folio(GFP_HIGHUSER_MOVABLE, 0, vma, vmf->address); if (!folio) return NULL; - entry = pte_to_swp_entry(vmf->orig_pte); + entry = softleaf_from_pte(vmf->orig_pte); if (mem_cgroup_swapin_charge_folio(folio, vma->vm_mm, GFP_KERNEL, entry)) { folio_put(folio); @@ -4480,7 +4480,7 @@ static struct folio *__alloc_swap_folio(struct vm_fault *vmf) static bool can_swapin_thp(struct vm_fault *vmf, pte_t *ptep, int nr_pages) { unsigned long addr; - swp_entry_t entry; + softleaf_t entry; int idx; pte_t pte; @@ -4490,7 +4490,7 @@ static bool can_swapin_thp(struct vm_fault *vmf, pte_t *ptep, int nr_pages) if (!pte_same(pte, pte_move_swp_offset(vmf->orig_pte, -idx))) return false; - entry = pte_to_swp_entry(pte); + entry = softleaf_from_pte(pte); if (swap_pte_batch(ptep, nr_pages, pte) != nr_pages) return false; @@ -4536,7 +4536,7 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf) unsigned long orders; struct folio *folio; unsigned long addr; - swp_entry_t entry; + softleaf_t entry; spinlock_t *ptl; pte_t *pte; gfp_t gfp; @@ -4557,7 +4557,7 @@ static struct folio *alloc_swap_folio(struct vm_fault *vmf) if (!zswap_never_enabled()) goto fallback; - entry = pte_to_swp_entry(vmf->orig_pte); + entry = softleaf_from_pte(vmf->orig_pte); /* * Get a list of all the (large) orders below PMD_ORDER that are enabled * and suitable for swapping THP. diff --git a/mm/migrate.c b/mm/migrate.c index 182a5b7b2ead..c01bc0ddf819 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -534,7 +534,7 @@ void migration_entry_wait_huge(struct vm_area_struct *vma, unsigned long addr, p * lock release in migration_entry_wait_on_locked(). */ hugetlb_vma_unlock_read(vma); - migration_entry_wait_on_locked(pte_to_swp_entry(pte), ptl); + migration_entry_wait_on_locked(entry, ptl); return; } diff --git a/mm/mincore.c b/mm/mincore.c index e1d50f198c42..62c9603a5414 100644 --- a/mm/mincore.c +++ b/mm/mincore.c @@ -202,7 +202,9 @@ static int mincore_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, for (i = 0; i < step; i++) vec[i] = 1; } else { /* pte is a swap entry */ - *vec = mincore_swap(pte_to_swp_entry(pte), false); + const softleaf_t entry = softleaf_from_pte(pte); + + *vec = mincore_swap(entry, false); } vec += step; } diff --git a/mm/rmap.c b/mm/rmap.c index 345466ad396b..d871f2eb821c 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1969,7 +1969,9 @@ static bool try_to_unmap_one(struct folio *folio, struct vm_area_struct *vma, if (likely(pte_present(pteval))) { pfn = pte_pfn(pteval); } else { - pfn = softleaf_to_pfn(pte_to_swp_entry(pteval)); + const softleaf_t entry = softleaf_from_pte(pteval); + + pfn = softleaf_to_pfn(entry); VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); } @@ -2368,7 +2370,9 @@ static bool try_to_migrate_one(struct folio *folio, struct vm_area_struct *vma, if (likely(pte_present(pteval))) { pfn = pte_pfn(pteval); } else { - pfn = softleaf_to_pfn(pte_to_swp_entry(pteval)); + const softleaf_t entry = softleaf_from_pte(pteval); + + pfn = softleaf_to_pfn(entry); VM_WARN_ON_FOLIO(folio_test_hugetlb(folio), folio); } diff --git a/mm/swapfile.c b/mm/swapfile.c index 684f78cd7dd1..1204fb0726d5 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -3201,8 +3201,9 @@ static int claim_swapfile(struct swap_info_struct *si, struct inode *inode) */ unsigned long generic_max_swapfile_size(void) { - return swp_offset(pte_to_swp_entry( - swp_entry_to_pte(swp_entry(0, ~0UL)))) + 1; + const softleaf_t entry = swp_entry(0, ~0UL); + + return swp_offset(softleaf_from_pte(softleaf_to_pte(entry))) + 1; } /* Can be overridden by an architecture for additional checks. */ -- 2.51.0