From: Anshuman Khandual Replace READ_ONCE() with standard page table accessors i.e pxdp_get() which anyways default into READ_ONCE() in cases where platform does not override. Also convert ptep_get_lockless() into ptep_get() as well. Link: https://lkml.kernel.org/r/20251001042502.1400726-1-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Reviewed-by: Dev Jain Acked-by: Lance Yang Acked-by: SeongJae Park Acked-by: David Hildenbrand Signed-off-by: Andrew Morton Signed-off-by: Samuel Holland --- Changes in v3: - Replace patch with cherry-pick from linux-next Changes in v2: - New patch for v2 (taken from LKML) mm/ptdump.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/mm/ptdump.c b/mm/ptdump.c index b600c7f864b8..973020000096 100644 --- a/mm/ptdump.c +++ b/mm/ptdump.c @@ -31,7 +31,7 @@ static int ptdump_pgd_entry(pgd_t *pgd, unsigned long addr, unsigned long next, struct mm_walk *walk) { struct ptdump_state *st = walk->private; - pgd_t val = READ_ONCE(*pgd); + pgd_t val = pgdp_get(pgd); #if CONFIG_PGTABLE_LEVELS > 4 && \ (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) @@ -54,7 +54,7 @@ static int ptdump_p4d_entry(p4d_t *p4d, unsigned long addr, unsigned long next, struct mm_walk *walk) { struct ptdump_state *st = walk->private; - p4d_t val = READ_ONCE(*p4d); + p4d_t val = p4dp_get(p4d); #if CONFIG_PGTABLE_LEVELS > 3 && \ (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) @@ -77,7 +77,7 @@ static int ptdump_pud_entry(pud_t *pud, unsigned long addr, unsigned long next, struct mm_walk *walk) { struct ptdump_state *st = walk->private; - pud_t val = READ_ONCE(*pud); + pud_t val = pudp_get(pud); #if CONFIG_PGTABLE_LEVELS > 2 && \ (defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)) @@ -100,7 +100,7 @@ static int ptdump_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk) { struct ptdump_state *st = walk->private; - pmd_t val = READ_ONCE(*pmd); + pmd_t val = pmdp_get(pmd); #if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS) if (pmd_page(val) == virt_to_page(lm_alias(kasan_early_shadow_pte))) @@ -121,7 +121,7 @@ static int ptdump_pte_entry(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { struct ptdump_state *st = walk->private; - pte_t val = ptep_get_lockless(pte); + pte_t val = ptep_get(pte); if (st->effective_prot_pte) st->effective_prot_pte(st, val); -- 2.47.2 From: Anshuman Khandual Replace all READ_ONCE() with a standard page table accessors i.e pxdp_get() that defaults into READ_ONCE() in cases where platform does not override. Link: https://lkml.kernel.org/r/20251007063100.2396936-1-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: David Hildenbrand Reviewed-by: Lance Yang Reviewed-by: Wei Yang Cc: Dev Jain Signed-off-by: Andrew Morton Signed-off-by: Samuel Holland --- Changes in v3: - New patch for v3 (cherry-picked from linux-next) mm/gup.c | 10 +++++----- mm/hmm.c | 2 +- mm/memory.c | 4 ++-- mm/mprotect.c | 2 +- mm/sparse-vmemmap.c | 2 +- mm/vmscan.c | 2 +- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/mm/gup.c b/mm/gup.c index a8ba5112e4d0..b46112d36f7e 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -950,7 +950,7 @@ static struct page *follow_pud_mask(struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; pudp = pud_offset(p4dp, address); - pud = READ_ONCE(*pudp); + pud = pudp_get(pudp); if (!pud_present(pud)) return no_page_table(vma, flags, address); if (pud_leaf(pud)) { @@ -975,7 +975,7 @@ static struct page *follow_p4d_mask(struct vm_area_struct *vma, p4d_t *p4dp, p4d; p4dp = p4d_offset(pgdp, address); - p4d = READ_ONCE(*p4dp); + p4d = p4dp_get(p4dp); BUILD_BUG_ON(p4d_leaf(p4d)); if (!p4d_present(p4d) || p4d_bad(p4d)) @@ -3060,7 +3060,7 @@ static int gup_fast_pud_range(p4d_t *p4dp, p4d_t p4d, unsigned long addr, pudp = pud_offset_lockless(p4dp, p4d, addr); do { - pud_t pud = READ_ONCE(*pudp); + pud_t pud = pudp_get(pudp); next = pud_addr_end(addr, end); if (unlikely(!pud_present(pud))) @@ -3086,7 +3086,7 @@ static int gup_fast_p4d_range(pgd_t *pgdp, pgd_t pgd, unsigned long addr, p4dp = p4d_offset_lockless(pgdp, pgd, addr); do { - p4d_t p4d = READ_ONCE(*p4dp); + p4d_t p4d = p4dp_get(p4dp); next = p4d_addr_end(addr, end); if (!p4d_present(p4d)) @@ -3108,7 +3108,7 @@ static void gup_fast_pgd_range(unsigned long addr, unsigned long end, pgdp = pgd_offset(current->mm, addr); do { - pgd_t pgd = READ_ONCE(*pgdp); + pgd_t pgd = pgdp_get(pgdp); next = pgd_addr_end(addr, end); if (pgd_none(pgd)) diff --git a/mm/hmm.c b/mm/hmm.c index 87562914670a..a56081d67ad6 100644 --- a/mm/hmm.c +++ b/mm/hmm.c @@ -491,7 +491,7 @@ static int hmm_vma_walk_pud(pud_t *pudp, unsigned long start, unsigned long end, /* Normally we don't want to split the huge page */ walk->action = ACTION_CONTINUE; - pud = READ_ONCE(*pudp); + pud = pudp_get(pudp); if (!pud_present(pud)) { spin_unlock(ptl); return hmm_vma_walk_hole(start, end, -1, walk); diff --git a/mm/memory.c b/mm/memory.c index b59ae7ce42eb..0c295e2fe8e8 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -6690,12 +6690,12 @@ int follow_pfnmap_start(struct follow_pfnmap_args *args) goto out; p4dp = p4d_offset(pgdp, address); - p4d = READ_ONCE(*p4dp); + p4d = p4dp_get(p4dp); if (p4d_none(p4d) || unlikely(p4d_bad(p4d))) goto out; pudp = pud_offset(p4dp, address); - pud = READ_ONCE(*pudp); + pud = pudp_get(pudp); if (pud_none(pud)) goto out; if (pud_leaf(pud)) { diff --git a/mm/mprotect.c b/mm/mprotect.c index 113b48985834..988c366137d5 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -599,7 +599,7 @@ static inline long change_pud_range(struct mmu_gather *tlb, break; } - pud = READ_ONCE(*pudp); + pud = pudp_get(pudp); if (pud_none(pud)) continue; diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index dbd8daccade2..37522d6cb398 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -439,7 +439,7 @@ int __meminit vmemmap_populate_hugepages(unsigned long start, unsigned long end, return -ENOMEM; pmd = pmd_offset(pud, addr); - if (pmd_none(READ_ONCE(*pmd))) { + if (pmd_none(pmdp_get(pmd))) { void *p; p = vmemmap_alloc_block_buf(PMD_SIZE, node, altmap); diff --git a/mm/vmscan.c b/mm/vmscan.c index b2fc8b626d3d..2239de111fa6 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3773,7 +3773,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long start, unsigned long end, pud = pud_offset(p4d, start & P4D_MASK); restart: for (i = pud_index(start), addr = start; addr != end; i++, addr = next) { - pud_t val = READ_ONCE(pud[i]); + pud_t val = pudp_get(pud + i); next = pud_addr_end(addr, end); -- 2.47.2 From: Anshuman Khandual Replace READ_ONCE() with a standard page table accessor i.e pudp_get() that anyways defaults into READ_ONCE() in cases where platform does not override Link: https://lkml.kernel.org/r/20251006055214.1845342-1-anshuman.khandual@arm.com Signed-off-by: Anshuman Khandual Acked-by: David Hildenbrand Reviewed-by: Dev Jain Reviewed-by: Oscar Salvador Cc: Lance Yang Signed-off-by: Andrew Morton Signed-off-by: Samuel Holland --- Changes in v3: - New patch for v3 (cherry-picked from linux-next) mm/mapping_dirty_helpers.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/mm/mapping_dirty_helpers.c b/mm/mapping_dirty_helpers.c index c193de6cb23a..737c407f4081 100644 --- a/mm/mapping_dirty_helpers.c +++ b/mm/mapping_dirty_helpers.c @@ -149,7 +149,7 @@ static int wp_clean_pud_entry(pud_t *pud, unsigned long addr, unsigned long end, struct mm_walk *walk) { #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD - pud_t pudval = READ_ONCE(*pud); + pud_t pudval = pudp_get(pud); /* Do not split a huge pud */ if (pud_trans_huge(pudval)) { -- 2.47.2 From: Anshuman Khandual Replace READ_ONCE() with standard page table accessors i.e pxdp_get() which anyways default into READ_ONCE() in cases where platform does not override. Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Namhyung Kim Cc: linux-perf-users@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Anshuman Khandual Link: https://lore.kernel.org/r/20251006042622.1743675-1-anshuman.khandual@arm.com/ Signed-off-by: Samuel Holland --- Changes in v3: - Replace my patch with Anshuman Khandual's patch from LKML Changes in v2: - New patch for v2 kernel/events/core.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index 1fd347da9026..fa4f9165bd94 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8122,7 +8122,7 @@ static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr) pte_t *ptep, pte; pgdp = pgd_offset(mm, addr); - pgd = READ_ONCE(*pgdp); + pgd = pgdp_get(pgdp); if (pgd_none(pgd)) return 0; @@ -8130,7 +8130,7 @@ static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr) return pgd_leaf_size(pgd); p4dp = p4d_offset_lockless(pgdp, pgd, addr); - p4d = READ_ONCE(*p4dp); + p4d = p4dp_get(p4dp); if (!p4d_present(p4d)) return 0; @@ -8138,7 +8138,7 @@ static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr) return p4d_leaf_size(p4d); pudp = pud_offset_lockless(p4dp, p4d, addr); - pud = READ_ONCE(*pudp); + pud = pudp_get(pudp); if (!pud_present(pud)) return 0; -- 2.47.2 Some platforms need to fix up the values when reading or writing page tables. Because of this, the accessors must always be used; it is not valid to simply dereference a pXX_t pointer. Move these definitions up by a few lines, so they will be in scope everywhere that currently dereferences a pXX_t pointer. Signed-off-by: Samuel Holland --- (no changes since v2) Changes in v2: - New patch for v2 include/linux/pgtable.h | 70 ++++++++++++++++++++--------------------- 1 file changed, 35 insertions(+), 35 deletions(-) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 32e8457ad535..ca8c99cdc1cc 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -90,6 +90,41 @@ static inline unsigned long pud_index(unsigned long address) #define pgd_index(a) (((a) >> PGDIR_SHIFT) & (PTRS_PER_PGD - 1)) #endif +#ifndef ptep_get +static inline pte_t ptep_get(pte_t *ptep) +{ + return READ_ONCE(*ptep); +} +#endif + +#ifndef pmdp_get +static inline pmd_t pmdp_get(pmd_t *pmdp) +{ + return READ_ONCE(*pmdp); +} +#endif + +#ifndef pudp_get +static inline pud_t pudp_get(pud_t *pudp) +{ + return READ_ONCE(*pudp); +} +#endif + +#ifndef p4dp_get +static inline p4d_t p4dp_get(p4d_t *p4dp) +{ + return READ_ONCE(*p4dp); +} +#endif + +#ifndef pgdp_get +static inline pgd_t pgdp_get(pgd_t *pgdp) +{ + return READ_ONCE(*pgdp); +} +#endif + #ifndef kernel_pte_init static inline void kernel_pte_init(void *addr) { @@ -334,41 +369,6 @@ static inline int pudp_set_access_flags(struct vm_area_struct *vma, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif -#ifndef ptep_get -static inline pte_t ptep_get(pte_t *ptep) -{ - return READ_ONCE(*ptep); -} -#endif - -#ifndef pmdp_get -static inline pmd_t pmdp_get(pmd_t *pmdp) -{ - return READ_ONCE(*pmdp); -} -#endif - -#ifndef pudp_get -static inline pud_t pudp_get(pud_t *pudp) -{ - return READ_ONCE(*pudp); -} -#endif - -#ifndef p4dp_get -static inline p4d_t p4dp_get(p4d_t *p4dp) -{ - return READ_ONCE(*p4dp); -} -#endif - -#ifndef pgdp_get -static inline pgd_t pgdp_get(pgd_t *pgdp) -{ - return READ_ONCE(*pgdp); -} -#endif - #ifndef __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG static inline int ptep_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, -- 2.47.2 Some platforms need to fix up the values when reading or writing page tables. Because of this, the accessors must always be used; it is not valid to simply dereference a pXX_t pointer. Fix all of the instances of this pattern in generic code, mostly by applying the below coccinelle semantic patch, repeated for each page table level. Some additional fixes were applied manually, mostly to macros where type information is unavailable. In a few places, a `pte_t *` or `pmd_t *` is actually a pointer to a PTE or PMDE value stored on the stack, not a pointer to a page table. In those cases, it is not appropriate to use the accessors, because the value is not globally visible, and any transformation from pXXp_get() has already been applied. Those places are marked by naming the pointer `ptentp` or `pmdvalp`, as opposed to `ptep` or `pmdp`. @@ pte_t *P; expression E; expression I; @@ - P[I] = E + set_pte(P + I, E) @@ pte_t *P; expression E; @@ ( - WRITE_ONCE(*P, E) + set_pte(P, E) | - *P = E + set_pte(P, E) ) @@ pte_t *P; expression I; @@ ( &P[I] | - READ_ONCE(P[I]) + ptep_get(P + I) | - P[I] + ptep_get(P + I) ) @@ pte_t *P; @@ ( - READ_ONCE(*P) + ptep_get(P) | - *P + ptep_get(P) ) Additionally, the following semantic patch was used to convert PMD and PUD references inside struct vm_fault: @@ struct vm_fault vmf; @@ ( - *vmf.pmd + pmdp_get(vmf.pmd) | - *vmf.pud + pudp_get(vmf.pud) ) @@ struct vm_fault *vmf; @@ ( - *vmf->pmd + pmdp_get(vmf->pmd) | - *vmf->pud + pudp_get(vmf->pud) ) Signed-off-by: Samuel Holland --- This commit covers some of the same changes as an existing series from Anshuman Khandual[1]. Unlike that series, this commit is a purely mechanical conversion to demonstrate the RISC-V changes, so it does not insert local variables to avoid redundant calls to the accessors. A manual conversion like in that series could improve performance. [1]: https://lore.kernel.org/linux-mm/20240917073117.1531207-1-anshuman.khandual@arm.com/ Changes in v3: - Rebased on top of torvalds/master (v6.18-rc5+) Changes in v2: - New patch for v2 fs/dax.c | 4 +- fs/proc/task_mmu.c | 27 +++++++------ fs/userfaultfd.c | 6 +-- include/linux/huge_mm.h | 8 ++-- include/linux/mm.h | 14 +++---- include/linux/pgtable.h | 42 +++++++++---------- mm/damon/vaddr.c | 2 +- mm/debug_vm_pgtable.c | 4 +- mm/filemap.c | 6 +-- mm/gup.c | 24 +++++------ mm/huge_memory.c | 90 ++++++++++++++++++++--------------------- mm/hugetlb.c | 10 ++--- mm/hugetlb_vmemmap.c | 4 +- mm/kasan/init.c | 39 +++++++++--------- mm/kasan/shadow.c | 12 +++--- mm/khugepaged.c | 4 +- mm/ksm.c | 2 +- mm/madvise.c | 8 ++-- mm/memory-failure.c | 14 +++---- mm/memory.c | 76 +++++++++++++++++----------------- mm/mempolicy.c | 4 +- mm/migrate.c | 4 +- mm/migrate_device.c | 10 ++--- mm/mlock.c | 6 +-- mm/mprotect.c | 2 +- mm/mremap.c | 30 +++++++------- mm/page_table_check.c | 4 +- mm/page_vma_mapped.c | 6 +-- mm/pagewalk.c | 14 +++---- mm/percpu.c | 8 ++-- mm/pgalloc-track.h | 8 ++-- mm/pgtable-generic.c | 23 ++++++----- mm/rmap.c | 8 ++-- mm/sparse-vmemmap.c | 8 ++-- mm/userfaultfd.c | 10 ++--- mm/vmalloc.c | 49 +++++++++++----------- mm/vmscan.c | 14 +++---- 37 files changed, 304 insertions(+), 300 deletions(-) diff --git a/fs/dax.c b/fs/dax.c index 516f995a988c..e09a80ee44a0 100644 --- a/fs/dax.c +++ b/fs/dax.c @@ -1900,7 +1900,7 @@ static vm_fault_t dax_iomap_pte_fault(struct vm_fault *vmf, unsigned long *pfnp, * the PTE we need to set up. If so just return and the fault will be * retried. */ - if (pmd_trans_huge(*vmf->pmd)) { + if (pmd_trans_huge(pmdp_get(vmf->pmd))) { ret = VM_FAULT_NOPAGE; goto unlock_entry; } @@ -2023,7 +2023,7 @@ static vm_fault_t dax_iomap_pmd_fault(struct vm_fault *vmf, unsigned long *pfnp, * the PMD we need to set up. If so just return and the fault will be * retried. */ - if (!pmd_none(*vmf->pmd) && !pmd_trans_huge(*vmf->pmd)) { + if (!pmd_none(pmdp_get(vmf->pmd)) && !pmd_trans_huge(pmdp_get(vmf->pmd))) { ret = 0; goto unlock_entry; } diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index fc35a0543f01..4f80704b78af 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1060,11 +1060,11 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, bool present = false; struct folio *folio; - if (pmd_present(*pmd)) { - page = vm_normal_page_pmd(vma, addr, *pmd); + if (pmd_present(pmdp_get(pmd))) { + page = vm_normal_page_pmd(vma, addr, pmdp_get(pmd)); present = true; - } else if (unlikely(thp_migration_supported() && is_swap_pmd(*pmd))) { - swp_entry_t entry = pmd_to_swp_entry(*pmd); + } else if (unlikely(thp_migration_supported() && is_swap_pmd(pmdp_get(pmd)))) { + swp_entry_t entry = pmd_to_swp_entry(pmdp_get(pmd)); if (is_pfn_swap_entry(entry)) page = pfn_swap_entry_to_page(entry); @@ -1081,7 +1081,8 @@ static void smaps_pmd_entry(pmd_t *pmd, unsigned long addr, else mss->file_thp += HPAGE_PMD_SIZE; - smaps_account(mss, page, true, pmd_young(*pmd), pmd_dirty(*pmd), + smaps_account(mss, page, true, pmd_young(pmdp_get(pmd)), + pmd_dirty(pmdp_get(pmd)), locked, present); } #else @@ -1636,7 +1637,7 @@ static inline void clear_soft_dirty(struct vm_area_struct *vma, static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - pmd_t old, pmd = *pmdp; + pmd_t old, pmd = pmdp_get(pmdp); if (pmd_present(pmd)) { /* See comment in change_huge_pmd() */ @@ -1678,10 +1679,10 @@ static int clear_refs_pte_range(pmd_t *pmd, unsigned long addr, goto out; } - if (!pmd_present(*pmd)) + if (!pmd_present(pmdp_get(pmd))) goto out; - folio = pmd_folio(*pmd); + folio = pmd_folio(pmdp_get(pmd)); /* Clear accessed and referenced bits. */ pmdp_test_and_clear_young(vma, addr, pmd); @@ -1989,7 +1990,7 @@ static int pagemap_pmd_range(pmd_t *pmdp, unsigned long addr, unsigned long end, if (ptl) { unsigned int idx = (addr & ~PMD_MASK) >> PAGE_SHIFT; u64 flags = 0, frame = 0; - pmd_t pmd = *pmdp; + pmd_t pmd = pmdp_get(pmdp); struct page *page = NULL; struct folio *folio = NULL; @@ -2416,7 +2417,7 @@ static unsigned long pagemap_thp_category(struct pagemap_scan_private *p, static void make_uffd_wp_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp) { - pmd_t old, pmd = *pmdp; + pmd_t old, pmd = pmdp_get(pmdp); if (pmd_present(pmd)) { old = pmdp_invalidate_ad(vma, addr, pmdp); @@ -2646,7 +2647,7 @@ static int pagemap_scan_thp_entry(pmd_t *pmd, unsigned long start, return -ENOENT; categories = p->cur_vma_category | - pagemap_thp_category(p, vma, start, *pmd); + pagemap_thp_category(p, vma, start, pmdp_get(pmd)); if (!pagemap_scan_is_interesting_page(categories, p)) goto out_unlock; @@ -3181,9 +3182,9 @@ static int gather_pte_stats(pmd_t *pmd, unsigned long addr, if (ptl) { struct page *page; - page = can_gather_numa_stats_pmd(*pmd, vma, addr); + page = can_gather_numa_stats_pmd(pmdp_get(pmd), vma, addr); if (page) - gather_stats(page, md, pmd_dirty(*pmd), + gather_stats(page, md, pmd_dirty(pmdp_get(pmd)), HPAGE_PMD_SIZE/PAGE_SIZE); spin_unlock(ptl); return 0; diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 54c6cc7fe9c6..2e2a6b326c2f 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -289,13 +289,13 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx, assert_fault_locked(vmf); pgd = pgd_offset(mm, address); - if (!pgd_present(*pgd)) + if (!pgd_present(pgdp_get(pgd))) goto out; p4d = p4d_offset(pgd, address); - if (!p4d_present(*p4d)) + if (!p4d_present(p4dp_get(p4d))) goto out; pud = pud_offset(p4d, address); - if (!pud_present(*pud)) + if (!pud_present(pudp_get(pud))) goto out; pmd = pmd_offset(pud, address); again: diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 71ac78b9f834..d2840221e7cd 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -409,7 +409,7 @@ void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, #define split_huge_pmd(__vma, __pmd, __address) \ do { \ pmd_t *____pmd = (__pmd); \ - if (is_swap_pmd(*____pmd) || pmd_trans_huge(*____pmd)) \ + if (is_swap_pmd(pmdp_get(____pmd)) || pmd_trans_huge(pmdp_get(____pmd))) \ __split_huge_pmd(__vma, __pmd, __address, \ false); \ } while (0) @@ -434,7 +434,7 @@ change_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, #define split_huge_pud(__vma, __pud, __address) \ do { \ pud_t *____pud = (__pud); \ - if (pud_trans_huge(*____pud)) \ + if (pud_trans_huge(pudp_get(____pud))) \ __split_huge_pud(__vma, __pud, __address); \ } while (0) @@ -456,7 +456,7 @@ static inline int is_swap_pmd(pmd_t pmd) static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { - if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd)) + if (is_swap_pmd(pmdp_get(pmd)) || pmd_trans_huge(pmdp_get(pmd))) return __pmd_trans_huge_lock(pmd, vma); else return NULL; @@ -464,7 +464,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma) { - if (pud_trans_huge(*pud)) + if (pud_trans_huge(pudp_get(pud))) return __pud_trans_huge_lock(pud, vma); else return NULL; diff --git a/include/linux/mm.h b/include/linux/mm.h index d16b33bacc32..fdc333384190 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2921,20 +2921,20 @@ int __pte_alloc_kernel(pmd_t *pmd); static inline p4d_t *p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) { - return (unlikely(pgd_none(*pgd)) && __p4d_alloc(mm, pgd, address)) ? + return (unlikely(pgd_none(pgdp_get(pgd))) && __p4d_alloc(mm, pgd, address)) ? NULL : p4d_offset(pgd, address); } static inline pud_t *pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) { - return (unlikely(p4d_none(*p4d)) && __pud_alloc(mm, p4d, address)) ? + return (unlikely(p4d_none(p4dp_get(p4d))) && __pud_alloc(mm, p4d, address)) ? NULL : pud_offset(p4d, address); } static inline pmd_t *pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) { - return (unlikely(pud_none(*pud)) && __pmd_alloc(mm, pud, address))? + return (unlikely(pud_none(pudp_get(pud))) && __pmd_alloc(mm, pud, address)) ? NULL: pmd_offset(pud, address); } #endif /* CONFIG_MMU */ @@ -3027,9 +3027,9 @@ static inline spinlock_t *ptlock_ptr(struct ptdesc *ptdesc) } #endif /* ALLOC_SPLIT_PTLOCKS */ -static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmd) +static inline spinlock_t *pte_lockptr(struct mm_struct *mm, pmd_t *pmdvalp) { - return ptlock_ptr(page_ptdesc(pmd_page(*pmd))); + return ptlock_ptr(page_ptdesc(pmd_page(*pmdvalp))); } static inline spinlock_t *ptep_lockptr(struct mm_struct *mm, pte_t *pte) @@ -3146,7 +3146,7 @@ pte_t *pte_offset_map_rw_nolock(struct mm_struct *mm, pmd_t *pmd, pte_unmap(pte); \ } while (0) -#define pte_alloc(mm, pmd) (unlikely(pmd_none(*(pmd))) && __pte_alloc(mm, pmd)) +#define pte_alloc(mm, pmd) (unlikely(pmd_none(pmdp_get(pmd))) && __pte_alloc(mm, pmd)) #define pte_alloc_map(mm, pmd, address) \ (pte_alloc(mm, pmd) ? NULL : pte_offset_map(pmd, address)) @@ -3156,7 +3156,7 @@ pte_t *pte_offset_map_rw_nolock(struct mm_struct *mm, pmd_t *pmd, NULL : pte_offset_map_lock(mm, pmd, address, ptlp)) #define pte_alloc_kernel(pmd, address) \ - ((unlikely(pmd_none(*(pmd))) && __pte_alloc_kernel(pmd))? \ + ((unlikely(pmd_none(pmdp_get(pmd))) && __pte_alloc_kernel(pmd)) ? \ NULL: pte_offset_kernel(pmd, address)) #if defined(CONFIG_SPLIT_PMD_PTLOCKS) diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index ca8c99cdc1cc..7ebb884fb328 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -149,14 +149,14 @@ static inline void pud_init(void *addr) #ifndef pte_offset_kernel static inline pte_t *pte_offset_kernel(pmd_t *pmd, unsigned long address) { - return (pte_t *)pmd_page_vaddr(*pmd) + pte_index(address); + return (pte_t *)pmd_page_vaddr(pmdp_get(pmd)) + pte_index(address); } #define pte_offset_kernel pte_offset_kernel #endif #ifdef CONFIG_HIGHPTE #define __pte_map(pmd, address) \ - ((pte_t *)kmap_local_page(pmd_page(*(pmd))) + pte_index((address))) + ((pte_t *)kmap_local_page(pmd_page(pmdp_get(pmd))) + pte_index((address))) #define pte_unmap(pte) do { \ kunmap_local((pte)); \ rcu_read_unlock(); \ @@ -178,7 +178,7 @@ void pte_free_defer(struct mm_struct *mm, pgtable_t pgtable); #ifndef pmd_offset static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) { - return pud_pgtable(*pud) + pmd_index(address); + return pud_pgtable(pudp_get(pud)) + pmd_index(address); } #define pmd_offset pmd_offset #endif @@ -186,7 +186,7 @@ static inline pmd_t *pmd_offset(pud_t *pud, unsigned long address) #ifndef pud_offset static inline pud_t *pud_offset(p4d_t *p4d, unsigned long address) { - return p4d_pgtable(*p4d) + pud_index(address); + return p4d_pgtable(p4dp_get(p4d)) + pud_index(address); } #define pud_offset pud_offset #endif @@ -230,7 +230,7 @@ static inline pte_t *virt_to_kpte(unsigned long vaddr) { pmd_t *pmd = pmd_off_k(vaddr); - return pmd_none(*pmd) ? NULL : pte_offset_kernel(pmd, vaddr); + return pmd_none(pmdp_get(pmd)) ? NULL : pte_offset_kernel(pmd, vaddr); } #ifndef pmd_young @@ -390,7 +390,7 @@ static inline int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - pmd_t pmd = *pmdp; + pmd_t pmd = pmdp_get(pmdp); int r = 1; if (!pmd_young(pmd)) r = 0; @@ -645,7 +645,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - pmd_t pmd = *pmdp; + pmd_t pmd = pmdp_get(pmdp); pmd_clear(pmdp); page_table_check_pmd_clear(mm, pmd); @@ -658,7 +658,7 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pud_t *pudp) { - pud_t pud = *pudp; + pud_t pud = pudp_get(pudp); pud_clear(pudp); page_table_check_pud_clear(mm, pud); @@ -968,7 +968,7 @@ static inline pte_t pte_sw_mkyoung(pte_t pte) static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) { - pmd_t old_pmd = *pmdp; + pmd_t old_pmd = pmdp_get(pmdp); set_pmd_at(mm, address, pmdp, pmd_wrprotect(old_pmd)); } #else @@ -985,7 +985,7 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, static inline void pudp_set_wrprotect(struct mm_struct *mm, unsigned long address, pud_t *pudp) { - pud_t old_pud = *pudp; + pud_t old_pud = pudp_get(pudp); set_pud_at(mm, address, pudp, pud_wrprotect(old_pud)); } @@ -1009,7 +1009,7 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, pmd_t *pmdp) { BUILD_BUG(); - return *pmdp; + return pmdp_get(pmdp); } #define pmdp_collapse_flush pmdp_collapse_flush #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -1037,7 +1037,7 @@ extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); static inline pmd_t generic_pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { - pmd_t old_pmd = *pmdp; + pmd_t old_pmd = pmdp_get(pmdp); set_pmd_at(vma->vm_mm, address, pmdp, pmd); return old_pmd; } @@ -1287,9 +1287,9 @@ void pmd_clear_bad(pmd_t *); static inline int pgd_none_or_clear_bad(pgd_t *pgd) { - if (pgd_none(*pgd)) + if (pgd_none(pgdp_get(pgd))) return 1; - if (unlikely(pgd_bad(*pgd))) { + if (unlikely(pgd_bad(pgdp_get(pgd)))) { pgd_clear_bad(pgd); return 1; } @@ -1298,9 +1298,9 @@ static inline int pgd_none_or_clear_bad(pgd_t *pgd) static inline int p4d_none_or_clear_bad(p4d_t *p4d) { - if (p4d_none(*p4d)) + if (p4d_none(p4dp_get(p4d))) return 1; - if (unlikely(p4d_bad(*p4d))) { + if (unlikely(p4d_bad(p4dp_get(p4d)))) { p4d_clear_bad(p4d); return 1; } @@ -1309,9 +1309,9 @@ static inline int p4d_none_or_clear_bad(p4d_t *p4d) static inline int pud_none_or_clear_bad(pud_t *pud) { - if (pud_none(*pud)) + if (pud_none(pudp_get(pud))) return 1; - if (unlikely(pud_bad(*pud))) { + if (unlikely(pud_bad(pudp_get(pud)))) { pud_clear_bad(pud); return 1; } @@ -1320,9 +1320,9 @@ static inline int pud_none_or_clear_bad(pud_t *pud) static inline int pmd_none_or_clear_bad(pmd_t *pmd) { - if (pmd_none(*pmd)) + if (pmd_none(pmdp_get(pmd))) return 1; - if (unlikely(pmd_bad(*pmd))) { + if (unlikely(pmd_bad(pmdp_get(pmd)))) { pmd_clear_bad(pmd); return 1; } @@ -1798,7 +1798,7 @@ static inline int pud_trans_unstable(pud_t *pud) { #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) - pud_t pudval = READ_ONCE(*pud); + pud_t pudval = pudp_get(pud); if (pud_none(pudval) || pud_trans_huge(pudval)) return 1; diff --git a/mm/damon/vaddr.c b/mm/damon/vaddr.c index 7e834467b2d8..b750cbe56bc6 100644 --- a/mm/damon/vaddr.c +++ b/mm/damon/vaddr.c @@ -910,7 +910,7 @@ static int damos_va_stat_pmd_entry(pmd_t *pmd, unsigned long addr, int nr; #ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pmd_trans_huge(*pmd)) { + if (pmd_trans_huge(pmdp_get(pmd))) { pmd_t pmde; ptl = pmd_trans_huge_lock(pmd, vma); diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 830107b6dd08..fb5596e2e426 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -431,7 +431,7 @@ static void __init pmd_huge_tests(struct pgtable_debug_args *args) * X86 defined pmd_set_huge() verifies that the given * PMD is not a populated non-leaf entry. */ - WRITE_ONCE(*args->pmdp, __pmd(0)); + set_pmd(args->pmdp, __pmd(0)); WARN_ON(!pmd_set_huge(args->pmdp, __pfn_to_phys(args->fixed_pmd_pfn), args->page_prot)); WARN_ON(!pmd_clear_huge(args->pmdp)); pmd = pmdp_get(args->pmdp); @@ -451,7 +451,7 @@ static void __init pud_huge_tests(struct pgtable_debug_args *args) * X86 defined pud_set_huge() verifies that the given * PUD is not a populated non-leaf entry. */ - WRITE_ONCE(*args->pudp, __pud(0)); + set_pud(args->pudp, __pud(0)); WARN_ON(!pud_set_huge(args->pudp, __pfn_to_phys(args->fixed_pud_pfn), args->page_prot)); WARN_ON(!pud_clear_huge(args->pudp)); pud = pudp_get(args->pudp); diff --git a/mm/filemap.c b/mm/filemap.c index 2f1e7e283a51..76027cf534c9 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -3611,13 +3611,13 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct folio *folio, struct mm_struct *mm = vmf->vma->vm_mm; /* Huge page is mapped? No need to proceed. */ - if (pmd_trans_huge(*vmf->pmd)) { + if (pmd_trans_huge(pmdp_get(vmf->pmd))) { folio_unlock(folio); folio_put(folio); return true; } - if (pmd_none(*vmf->pmd) && folio_test_pmd_mappable(folio)) { + if (pmd_none(pmdp_get(vmf->pmd)) && folio_test_pmd_mappable(folio)) { struct page *page = folio_file_page(folio, start); vm_fault_t ret = do_set_pmd(vmf, folio, page); if (!ret) { @@ -3627,7 +3627,7 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct folio *folio, } } - if (pmd_none(*vmf->pmd) && vmf->prealloc_pte) + if (pmd_none(pmdp_get(vmf->pmd)) && vmf->prealloc_pte) pmd_install(mm, vmf->pmd, &vmf->prealloc_pte); return false; diff --git a/mm/gup.c b/mm/gup.c index b46112d36f7e..549f9e868311 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -652,7 +652,7 @@ static struct page *follow_huge_pud(struct vm_area_struct *vma, { struct mm_struct *mm = vma->vm_mm; struct page *page; - pud_t pud = *pudp; + pud_t pud = pudp_get(pudp); unsigned long pfn = pud_pfn(pud); int ret; @@ -704,7 +704,7 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma, unsigned long *page_mask) { struct mm_struct *mm = vma->vm_mm; - pmd_t pmdval = *pmd; + pmd_t pmdval = pmdp_get(pmd); struct page *page; int ret; @@ -719,7 +719,7 @@ static struct page *follow_huge_pmd(struct vm_area_struct *vma, if ((flags & FOLL_DUMP) && is_huge_zero_pmd(pmdval)) return ERR_PTR(-EFAULT); - if (pmd_protnone(*pmd) && !gup_can_follow_protnone(vma, flags)) + if (pmd_protnone(pmdp_get(pmd)) && !gup_can_follow_protnone(vma, flags)) return NULL; if (!pmd_write(pmdval) && gup_must_unshare(vma, flags, page)) @@ -918,7 +918,7 @@ static struct page *follow_pmd_mask(struct vm_area_struct *vma, return no_page_table(vma, flags, address); ptl = pmd_lock(mm, pmd); - pmdval = *pmd; + pmdval = pmdp_get(pmd); if (unlikely(!pmd_present(pmdval))) { spin_unlock(ptl); return no_page_table(vma, flags, address); @@ -1017,7 +1017,7 @@ static struct page *follow_page_mask(struct vm_area_struct *vma, *page_mask = 0; pgd = pgd_offset(mm, address); - if (pgd_none(*pgd) || unlikely(pgd_bad(*pgd))) + if (pgd_none(pgdp_get(pgd)) || unlikely(pgd_bad(pgdp_get(pgd)))) page = no_page_table(vma, flags, address); else page = follow_p4d_mask(vma, address, pgd, flags, page_mask); @@ -1043,16 +1043,16 @@ static int get_gate_page(struct mm_struct *mm, unsigned long address, if (gup_flags & FOLL_WRITE) return -EFAULT; pgd = pgd_offset(mm, address); - if (pgd_none(*pgd)) + if (pgd_none(pgdp_get(pgd))) return -EFAULT; p4d = p4d_offset(pgd, address); - if (p4d_none(*p4d)) + if (p4d_none(p4dp_get(p4d))) return -EFAULT; pud = pud_offset(p4d, address); - if (pud_none(*pud)) + if (pud_none(pudp_get(pud))) return -EFAULT; pmd = pmd_offset(pud, address); - if (!pmd_present(*pmd)) + if (!pmd_present(pmdp_get(pmd))) return -EFAULT; pte = pte_offset_map(pmd, address); if (!pte) @@ -2876,7 +2876,7 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, if (!folio) goto pte_unmap; - if (unlikely(pmd_val(pmd) != pmd_val(*pmdp)) || + if (unlikely(pmd_val(pmd) != pmd_val(pmdp_get(pmdp))) || unlikely(pte_val(pte) != pte_val(ptep_get(ptep)))) { gup_put_folio(folio, 1, flags); goto pte_unmap; @@ -2953,7 +2953,7 @@ static int gup_fast_pmd_leaf(pmd_t orig, pmd_t *pmdp, unsigned long addr, if (!folio) return 0; - if (unlikely(pmd_val(orig) != pmd_val(*pmdp))) { + if (unlikely(pmd_val(orig) != pmd_val(pmdp_get(pmdp)))) { gup_put_folio(folio, refs, flags); return 0; } @@ -2996,7 +2996,7 @@ static int gup_fast_pud_leaf(pud_t orig, pud_t *pudp, unsigned long addr, if (!folio) return 0; - if (unlikely(pud_val(orig) != pud_val(*pudp))) { + if (unlikely(pud_val(orig) != pud_val(pudp_get(pudp)))) { gup_put_folio(folio, refs, flags); return 0; } diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 323654fb4f8c..cee70fdbe475 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1254,7 +1254,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf) } vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); - if (unlikely(!pmd_none(*vmf->pmd))) { + if (unlikely(!pmd_none(pmdp_get(vmf->pmd)))) { goto unlock_release; } else { ret = check_stable_address_space(vma->vm_mm); @@ -1367,7 +1367,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf) } vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); ret = 0; - if (pmd_none(*vmf->pmd)) { + if (pmd_none(pmdp_get(vmf->pmd))) { ret = check_stable_address_space(vma->vm_mm); if (ret) { spin_unlock(vmf->ptl); @@ -1420,16 +1420,16 @@ static vm_fault_t insert_pmd(struct vm_area_struct *vma, unsigned long addr, } ptl = pmd_lock(mm, pmd); - if (!pmd_none(*pmd)) { + if (!pmd_none(pmdp_get(pmd))) { const unsigned long pfn = fop.is_folio ? folio_pfn(fop.folio) : fop.pfn; if (write) { - if (pmd_pfn(*pmd) != pfn) { - WARN_ON_ONCE(!is_huge_zero_pmd(*pmd)); + if (pmd_pfn(pmdp_get(pmd)) != pfn) { + WARN_ON_ONCE(!is_huge_zero_pmd(pmdp_get(pmd))); goto out_unlock; } - entry = pmd_mkyoung(*pmd); + entry = pmd_mkyoung(pmdp_get(pmd)); entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma); if (pmdp_set_access_flags(vma, addr, pmd, entry, 1)) update_mmu_cache_pmd(vma, addr, pmd); @@ -1544,14 +1544,14 @@ static vm_fault_t insert_pud(struct vm_area_struct *vma, unsigned long addr, return VM_FAULT_SIGBUS; ptl = pud_lock(mm, pud); - if (!pud_none(*pud)) { + if (!pud_none(pudp_get(pud))) { const unsigned long pfn = fop.is_folio ? folio_pfn(fop.folio) : fop.pfn; if (write) { - if (WARN_ON_ONCE(pud_pfn(*pud) != pfn)) + if (WARN_ON_ONCE(pud_pfn(pudp_get(pud)) != pfn)) goto out_unlock; - entry = pud_mkyoung(*pud); + entry = pud_mkyoung(pudp_get(pud)); entry = maybe_pud_mkwrite(pud_mkdirty(entry), vma); if (pudp_set_access_flags(vma, addr, pud, entry, 1)) update_mmu_cache_pud(vma, addr, pud); @@ -1647,7 +1647,7 @@ void touch_pmd(struct vm_area_struct *vma, unsigned long addr, { pmd_t _pmd; - _pmd = pmd_mkyoung(*pmd); + _pmd = pmd_mkyoung(pmdp_get(pmd)); if (write) _pmd = pmd_mkdirty(_pmd); if (pmdp_set_access_flags(vma, addr & HPAGE_PMD_MASK, @@ -1698,7 +1698,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); ret = -EAGAIN; - pmd = *src_pmd; + pmd = pmdp_get(src_pmd); #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION if (unlikely(is_swap_pmd(pmd))) { @@ -1709,9 +1709,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, entry = make_readable_migration_entry( swp_offset(entry)); pmd = swp_entry_to_pmd(entry); - if (pmd_swp_soft_dirty(*src_pmd)) + if (pmd_swp_soft_dirty(pmdp_get(src_pmd))) pmd = pmd_swp_mksoft_dirty(pmd); - if (pmd_swp_uffd_wp(*src_pmd)) + if (pmd_swp_uffd_wp(pmdp_get(src_pmd))) pmd = pmd_swp_mkuffd_wp(pmd); set_pmd_at(src_mm, addr, src_pmd, pmd); } @@ -1785,7 +1785,7 @@ void touch_pud(struct vm_area_struct *vma, unsigned long addr, { pud_t _pud; - _pud = pud_mkyoung(*pud); + _pud = pud_mkyoung(pudp_get(pud)); if (write) _pud = pud_mkdirty(_pud); if (pudp_set_access_flags(vma, addr & HPAGE_PUD_MASK, @@ -1806,7 +1806,7 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); ret = -EAGAIN; - pud = *src_pud; + pud = pudp_get(src_pud); if (unlikely(!pud_trans_huge(pud))) goto out_unlock; @@ -1833,7 +1833,7 @@ void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) bool write = vmf->flags & FAULT_FLAG_WRITE; vmf->ptl = pud_lock(vmf->vma->vm_mm, vmf->pud); - if (unlikely(!pud_same(*vmf->pud, orig_pud))) + if (unlikely(!pud_same(pudp_get(vmf->pud), orig_pud))) goto unlock; touch_pud(vmf->vma, vmf->address, vmf->pud, write); @@ -1847,7 +1847,7 @@ void huge_pmd_set_accessed(struct vm_fault *vmf) bool write = vmf->flags & FAULT_FLAG_WRITE; vmf->ptl = pmd_lock(vmf->vma->vm_mm, vmf->pmd); - if (unlikely(!pmd_same(*vmf->pmd, vmf->orig_pmd))) + if (unlikely(!pmd_same(pmdp_get(vmf->pmd), vmf->orig_pmd))) goto unlock; touch_pmd(vmf->vma, vmf->address, vmf->pmd, write); @@ -1912,7 +1912,7 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf) spin_lock(vmf->ptl); - if (unlikely(!pmd_same(*vmf->pmd, orig_pmd))) { + if (unlikely(!pmd_same(pmdp_get(vmf->pmd), orig_pmd))) { spin_unlock(vmf->ptl); return 0; } @@ -1930,7 +1930,7 @@ vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf) spin_unlock(vmf->ptl); folio_lock(folio); spin_lock(vmf->ptl); - if (unlikely(!pmd_same(*vmf->pmd, orig_pmd))) { + if (unlikely(!pmd_same(pmdp_get(vmf->pmd), orig_pmd))) { spin_unlock(vmf->ptl); folio_unlock(folio); folio_put(folio); @@ -2108,7 +2108,7 @@ bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, if (!ptl) goto out_unlocked; - orig_pmd = *pmd; + orig_pmd = pmdp_get(pmd); if (is_huge_zero_pmd(orig_pmd)) goto out; @@ -2296,8 +2296,8 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, * should have released it; but move_page_tables() might have already * inserted a page table, if racing against shmem/file collapse. */ - if (!pmd_none(*new_pmd)) { - VM_BUG_ON(pmd_trans_huge(*new_pmd)); + if (!pmd_none(pmdp_get(new_pmd))) { + VM_BUG_ON(pmd_trans_huge(pmdp_get(new_pmd))); return false; } @@ -2313,7 +2313,7 @@ bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, pmd = pmdp_huge_get_and_clear(mm, old_addr, old_pmd); if (pmd_present(pmd)) force_flush = true; - VM_BUG_ON(!pmd_none(*new_pmd)); + VM_BUG_ON(!pmd_none(pmdp_get(new_pmd))); if (pmd_move_must_withdraw(new_ptl, old_ptl, vma)) { pgtable_t pgtable; @@ -2363,12 +2363,12 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, return 0; #ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION - if (is_swap_pmd(*pmd)) { - swp_entry_t entry = pmd_to_swp_entry(*pmd); + if (is_swap_pmd(pmdp_get(pmd))) { + swp_entry_t entry = pmd_to_swp_entry(pmdp_get(pmd)); struct folio *folio = pfn_swap_entry_folio(entry); pmd_t newpmd; - VM_BUG_ON(!is_pmd_migration_entry(*pmd)); + VM_BUG_ON(!is_pmd_migration_entry(pmdp_get(pmd))); if (is_writable_migration_entry(entry)) { /* * A protection check is difficult so @@ -2379,17 +2379,17 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, else entry = make_readable_migration_entry(swp_offset(entry)); newpmd = swp_entry_to_pmd(entry); - if (pmd_swp_soft_dirty(*pmd)) + if (pmd_swp_soft_dirty(pmdp_get(pmd))) newpmd = pmd_swp_mksoft_dirty(newpmd); } else { - newpmd = *pmd; + newpmd = pmdp_get(pmd); } if (uffd_wp) newpmd = pmd_swp_mkuffd_wp(newpmd); else if (uffd_wp_resolve) newpmd = pmd_swp_clear_uffd_wp(newpmd); - if (!pmd_same(*pmd, newpmd)) + if (!pmd_same(pmdp_get(pmd), newpmd)) set_pmd_at(mm, addr, pmd, newpmd); goto unlock; } @@ -2403,13 +2403,13 @@ int change_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, * data is likely to be read-cached on the local CPU and * local/remote hits to the zero page are not interesting. */ - if (is_huge_zero_pmd(*pmd)) + if (is_huge_zero_pmd(pmdp_get(pmd))) goto unlock; - if (pmd_protnone(*pmd)) + if (pmd_protnone(pmdp_get(pmd))) goto unlock; - folio = pmd_folio(*pmd); + folio = pmd_folio(pmdp_get(pmd)); toptier = node_is_toptier(folio_nid(folio)); /* * Skip scanning top tier node if normal numa @@ -2540,7 +2540,7 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm struct mmu_notifier_range range; int err = 0; - src_pmdval = *src_pmd; + src_pmdval = pmdp_get(src_pmd); src_ptl = pmd_lockptr(mm, src_pmd); lockdep_assert_held(src_ptl); @@ -2602,8 +2602,8 @@ int move_pages_huge_pmd(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd, pm dst_ptl = pmd_lockptr(mm, dst_pmd); double_pt_lock(src_ptl, dst_ptl); - if (unlikely(!pmd_same(*src_pmd, src_pmdval) || - !pmd_same(*dst_pmd, dst_pmdval))) { + if (unlikely(!pmd_same(pmdp_get(src_pmd), src_pmdval) || + !pmd_same(pmdp_get(dst_pmd), dst_pmdval))) { err = -EAGAIN; goto unlock_ptls; } @@ -2669,7 +2669,7 @@ spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) { spinlock_t *ptl; ptl = pmd_lock(vma->vm_mm, pmd); - if (likely(is_swap_pmd(*pmd) || pmd_trans_huge(*pmd))) + if (likely(is_swap_pmd(pmdp_get(pmd)) || pmd_trans_huge(pmdp_get(pmd)))) return ptl; spin_unlock(ptl); return NULL; @@ -2686,7 +2686,7 @@ spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma) spinlock_t *ptl; ptl = pud_lock(vma->vm_mm, pud); - if (likely(pud_trans_huge(*pud))) + if (likely(pud_trans_huge(pudp_get(pud)))) return ptl; spin_unlock(ptl); return NULL; @@ -2738,7 +2738,7 @@ static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud, VM_BUG_ON(haddr & ~HPAGE_PUD_MASK); VM_BUG_ON_VMA(vma->vm_start > haddr, vma); VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma); - VM_BUG_ON(!pud_trans_huge(*pud)); + VM_BUG_ON(!pud_trans_huge(pudp_get(pud))); count_vm_event(THP_SPLIT_PUD); @@ -2771,7 +2771,7 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, (address & HPAGE_PUD_MASK) + HPAGE_PUD_SIZE); mmu_notifier_invalidate_range_start(&range); ptl = pud_lock(vma->vm_mm, pud); - if (unlikely(!pud_trans_huge(*pud))) + if (unlikely(!pud_trans_huge(pudp_get(pud)))) goto out; __split_huge_pud_locked(vma, pud, range.start); @@ -2844,7 +2844,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, VM_BUG_ON(haddr & ~HPAGE_PMD_MASK); VM_BUG_ON_VMA(vma->vm_start > haddr, vma); VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PMD_SIZE, vma); - VM_BUG_ON(!is_pmd_migration_entry(*pmd) && !pmd_trans_huge(*pmd)); + VM_BUG_ON(!is_pmd_migration_entry(pmdp_get(pmd)) && !pmd_trans_huge(pmdp_get(pmd))); count_vm_event(THP_SPLIT_PMD); @@ -2879,7 +2879,7 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, return; } - if (is_huge_zero_pmd(*pmd)) { + if (is_huge_zero_pmd(pmdp_get(pmd))) { /* * FIXME: Do we want to invalidate secondary mmu by calling * mmu_notifier_arch_invalidate_secondary_tlbs() see comments below @@ -2892,11 +2892,11 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd, return __split_huge_zero_page_pmd(vma, haddr, pmd); } - pmd_migration = is_pmd_migration_entry(*pmd); + pmd_migration = is_pmd_migration_entry(pmdp_get(pmd)); if (unlikely(pmd_migration)) { swp_entry_t entry; - old_pmd = *pmd; + old_pmd = pmdp_get(pmd); entry = pmd_to_swp_entry(old_pmd); page = pfn_swap_entry_to_page(entry); write = is_writable_migration_entry(entry); @@ -3052,7 +3052,7 @@ void split_huge_pmd_locked(struct vm_area_struct *vma, unsigned long address, pmd_t *pmd, bool freeze) { VM_WARN_ON_ONCE(!IS_ALIGNED(address, HPAGE_PMD_SIZE)); - if (pmd_trans_huge(*pmd) || is_pmd_migration_entry(*pmd)) + if (pmd_trans_huge(pmdp_get(pmd)) || is_pmd_migration_entry(pmdp_get(pmd))) __split_huge_pmd_locked(vma, pmd, address, freeze); } @@ -3140,7 +3140,7 @@ static bool __discard_anon_folio_pmd_locked(struct vm_area_struct *vma, { struct mm_struct *mm = vma->vm_mm; int ref_count, map_count; - pmd_t orig_pmd = *pmdp; + pmd_t orig_pmd = pmdp_get(pmdp); if (pmd_dirty(orig_pmd)) folio_set_dirty(folio); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 0455119716ec..41cbc85b5051 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -7584,7 +7584,7 @@ pte_t *huge_pmd_share(struct mm_struct *mm, struct vm_area_struct *vma, goto out; spin_lock(&mm->page_table_lock); - if (pud_none(*pud)) { + if (pud_none(pudp_get(pud))) { pud_populate(mm, pud, (pmd_t *)((unsigned long)spte & PAGE_MASK)); mm_inc_nr_pmds(mm); @@ -7677,7 +7677,7 @@ pte_t *huge_pte_alloc(struct mm_struct *mm, struct vm_area_struct *vma, pte = (pte_t *)pud; } else { BUG_ON(sz != PMD_SIZE); - if (want_pmd_share(vma, addr) && pud_none(*pud)) + if (want_pmd_share(vma, addr) && pud_none(pudp_get(pud))) pte = huge_pmd_share(mm, vma, addr, pud); else pte = (pte_t *)pmd_alloc(mm, pud, addr); @@ -7711,17 +7711,17 @@ pte_t *huge_pte_offset(struct mm_struct *mm, pmd_t *pmd; pgd = pgd_offset(mm, addr); - if (!pgd_present(*pgd)) + if (!pgd_present(pgdp_get(pgd))) return NULL; p4d = p4d_offset(pgd, addr); - if (!p4d_present(*p4d)) + if (!p4d_present(p4dp_get(p4d))) return NULL; pud = pud_offset(p4d, addr); if (sz == PUD_SIZE) /* must be pud huge, non-present or none */ return (pte_t *)pud; - if (!pud_present(*pud)) + if (!pud_present(pudp_get(pud))) return NULL; /* must have a valid entry and size to go further */ diff --git a/mm/hugetlb_vmemmap.c b/mm/hugetlb_vmemmap.c index ba0fb1b6a5a8..059eb78480f5 100644 --- a/mm/hugetlb_vmemmap.c +++ b/mm/hugetlb_vmemmap.c @@ -72,7 +72,7 @@ static int vmemmap_split_pmd(pmd_t *pmd, struct page *head, unsigned long start, } spin_lock(&init_mm.page_table_lock); - if (likely(pmd_leaf(*pmd))) { + if (likely(pmd_leaf(pmdp_get(pmd)))) { /* * Higher order allocations from buddy allocator must be able to * be treated as indepdenent small pages (as they can be freed @@ -106,7 +106,7 @@ static int vmemmap_pmd_entry(pmd_t *pmd, unsigned long addr, walk->action = ACTION_CONTINUE; spin_lock(&init_mm.page_table_lock); - head = pmd_leaf(*pmd) ? pmd_page(*pmd) : NULL; + head = pmd_leaf(pmdp_get(pmd)) ? pmd_page(pmdp_get(pmd)) : NULL; /* * Due to HugeTLB alignment requirements and the vmemmap * pages being at the start of the hotplugged memory diff --git a/mm/kasan/init.c b/mm/kasan/init.c index f084e7a5df1e..8e0fc4d0cd1e 100644 --- a/mm/kasan/init.c +++ b/mm/kasan/init.c @@ -121,7 +121,7 @@ static int __ref zero_pmd_populate(pud_t *pud, unsigned long addr, continue; } - if (pmd_none(*pmd)) { + if (pmd_none(pmdp_get(pmd))) { pte_t *p; if (slab_is_available()) @@ -160,7 +160,7 @@ static int __ref zero_pud_populate(p4d_t *p4d, unsigned long addr, continue; } - if (pud_none(*pud)) { + if (pud_none(pudp_get(pud))) { pmd_t *p; if (slab_is_available()) { @@ -202,7 +202,7 @@ static int __ref zero_p4d_populate(pgd_t *pgd, unsigned long addr, continue; } - if (p4d_none(*p4d)) { + if (p4d_none(p4dp_get(p4d))) { pud_t *p; if (slab_is_available()) { @@ -265,7 +265,7 @@ int __ref kasan_populate_early_shadow(const void *shadow_start, continue; } - if (pgd_none(*pgd)) { + if (pgd_none(pgdp_get(pgd))) { if (slab_is_available()) { if (!p4d_alloc(&init_mm, pgd, addr)) @@ -292,7 +292,8 @@ static void kasan_free_pte(pte_t *pte_start, pmd_t *pmd) return; } - pte_free_kernel(&init_mm, (pte_t *)page_to_virt(pmd_page(*pmd))); + pte_free_kernel(&init_mm, + (pte_t *)page_to_virt(pmd_page(pmdp_get(pmd)))); pmd_clear(pmd); } @@ -303,11 +304,11 @@ static void kasan_free_pmd(pmd_t *pmd_start, pud_t *pud) for (i = 0; i < PTRS_PER_PMD; i++) { pmd = pmd_start + i; - if (!pmd_none(*pmd)) + if (!pmd_none(pmdp_get(pmd))) return; } - pmd_free(&init_mm, (pmd_t *)page_to_virt(pud_page(*pud))); + pmd_free(&init_mm, (pmd_t *)page_to_virt(pud_page(pudp_get(pud)))); pud_clear(pud); } @@ -318,11 +319,11 @@ static void kasan_free_pud(pud_t *pud_start, p4d_t *p4d) for (i = 0; i < PTRS_PER_PUD; i++) { pud = pud_start + i; - if (!pud_none(*pud)) + if (!pud_none(pudp_get(pud))) return; } - pud_free(&init_mm, (pud_t *)page_to_virt(p4d_page(*p4d))); + pud_free(&init_mm, (pud_t *)page_to_virt(p4d_page(p4dp_get(p4d)))); p4d_clear(p4d); } @@ -333,11 +334,11 @@ static void kasan_free_p4d(p4d_t *p4d_start, pgd_t *pgd) for (i = 0; i < PTRS_PER_P4D; i++) { p4d = p4d_start + i; - if (!p4d_none(*p4d)) + if (!p4d_none(p4dp_get(p4d))) return; } - p4d_free(&init_mm, (p4d_t *)page_to_virt(pgd_page(*pgd))); + p4d_free(&init_mm, (p4d_t *)page_to_virt(pgd_page(pgdp_get(pgd)))); pgd_clear(pgd); } @@ -373,10 +374,10 @@ static void kasan_remove_pmd_table(pmd_t *pmd, unsigned long addr, next = pmd_addr_end(addr, end); - if (!pmd_present(*pmd)) + if (!pmd_present(pmdp_get(pmd))) continue; - if (kasan_pte_table(*pmd)) { + if (kasan_pte_table(pmdp_get(pmd))) { if (IS_ALIGNED(addr, PMD_SIZE) && IS_ALIGNED(next, PMD_SIZE)) { pmd_clear(pmd); @@ -399,10 +400,10 @@ static void kasan_remove_pud_table(pud_t *pud, unsigned long addr, next = pud_addr_end(addr, end); - if (!pud_present(*pud)) + if (!pud_present(pudp_get(pud))) continue; - if (kasan_pmd_table(*pud)) { + if (kasan_pmd_table(pudp_get(pud))) { if (IS_ALIGNED(addr, PUD_SIZE) && IS_ALIGNED(next, PUD_SIZE)) { pud_clear(pud); @@ -426,10 +427,10 @@ static void kasan_remove_p4d_table(p4d_t *p4d, unsigned long addr, next = p4d_addr_end(addr, end); - if (!p4d_present(*p4d)) + if (!p4d_present(p4dp_get(p4d))) continue; - if (kasan_pud_table(*p4d)) { + if (kasan_pud_table(p4dp_get(p4d))) { if (IS_ALIGNED(addr, P4D_SIZE) && IS_ALIGNED(next, P4D_SIZE)) { p4d_clear(p4d); @@ -460,10 +461,10 @@ void kasan_remove_zero_shadow(void *start, unsigned long size) next = pgd_addr_end(addr, end); pgd = pgd_offset_k(addr); - if (!pgd_present(*pgd)) + if (!pgd_present(pgdp_get(pgd))) continue; - if (kasan_p4d_table(*pgd)) { + if (kasan_p4d_table(pgdp_get(pgd))) { if (IS_ALIGNED(addr, PGDIR_SIZE) && IS_ALIGNED(next, PGDIR_SIZE)) { pgd_clear(pgd); diff --git a/mm/kasan/shadow.c b/mm/kasan/shadow.c index 5d2a876035d6..331bbb7ff025 100644 --- a/mm/kasan/shadow.c +++ b/mm/kasan/shadow.c @@ -191,20 +191,20 @@ static bool shadow_mapped(unsigned long addr) pmd_t *pmd; pte_t *pte; - if (pgd_none(*pgd)) + if (pgd_none(pgdp_get(pgd))) return false; p4d = p4d_offset(pgd, addr); - if (p4d_none(*p4d)) + if (p4d_none(p4dp_get(p4d))) return false; pud = pud_offset(p4d, addr); - if (pud_none(*pud)) + if (pud_none(pudp_get(pud))) return false; - if (pud_leaf(*pud)) + if (pud_leaf(pudp_get(pud))) return true; pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) + if (pmd_none(pmdp_get(pmd))) return false; - if (pmd_leaf(*pmd)) + if (pmd_leaf(pmdp_get(pmd))) return true; pte = pte_offset_kernel(pmd, addr); return !pte_none(ptep_get(pte)); diff --git a/mm/khugepaged.c b/mm/khugepaged.c index abe54f0043c7..1bff8ade751a 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1191,7 +1191,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, if (pte) pte_unmap(pte); spin_lock(pmd_ptl); - BUG_ON(!pmd_none(*pmd)); + BUG_ON(!pmd_none(pmdp_get(pmd))); /* * We can only use set_pmd_at when establishing * hugepmds and never for establishing regular pmds that @@ -1228,7 +1228,7 @@ static int collapse_huge_page(struct mm_struct *mm, unsigned long address, _pmd = maybe_pmd_mkwrite(pmd_mkdirty(_pmd), vma); spin_lock(pmd_ptl); - BUG_ON(!pmd_none(*pmd)); + BUG_ON(!pmd_none(pmdp_get(pmd))); folio_add_new_anon_rmap(folio, vma, address, RMAP_EXCLUSIVE); folio_add_lru_vma(folio, vma); pgtable_trans_huge_deposit(mm, pmd, pgtable); diff --git a/mm/ksm.c b/mm/ksm.c index c4e730409949..0a0eeb667fe6 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1322,7 +1322,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct folio *folio, set_pte_at(mm, pvmw.address, pvmw.pte, entry); } - *orig_pte = entry; + set_pte(orig_pte, entry); err = 0; out_unlock: diff --git a/mm/madvise.c b/mm/madvise.c index fb1c86e630b6..53e60565f3e5 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -377,7 +377,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, !can_do_file_pageout(vma); #ifdef CONFIG_TRANSPARENT_HUGEPAGE - if (pmd_trans_huge(*pmd)) { + if (pmd_trans_huge(pmdp_get(pmd))) { pmd_t orig_pmd; unsigned long next = pmd_addr_end(addr, end); @@ -386,7 +386,7 @@ static int madvise_cold_or_pageout_pte_range(pmd_t *pmd, if (!ptl) return 0; - orig_pmd = *pmd; + orig_pmd = pmdp_get(pmd); if (is_huge_zero_pmd(orig_pmd)) goto huge_unlock; @@ -668,7 +668,7 @@ static int madvise_free_pte_range(pmd_t *pmd, unsigned long addr, int nr, max_nr; next = pmd_addr_end(addr, end); - if (pmd_trans_huge(*pmd)) + if (pmd_trans_huge(pmdp_get(pmd))) if (madvise_free_huge_pmd(tlb, vma, pmd, addr, next)) return 0; @@ -1116,7 +1116,7 @@ static int guard_install_set_pte(unsigned long addr, unsigned long next, unsigned long *nr_pages = (unsigned long *)walk->private; /* Simply install a PTE marker, this causes segfault on access. */ - *ptep = make_pte_marker(PTE_MARKER_GUARD); + set_pte(ptep, make_pte_marker(PTE_MARKER_GUARD)); (*nr_pages)++; return 0; diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 3edebb0cda30..5231febc6345 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -339,20 +339,20 @@ static unsigned long dev_pagemap_mapping_shift(struct vm_area_struct *vma, VM_BUG_ON_VMA(address == -EFAULT, vma); pgd = pgd_offset(vma->vm_mm, address); - if (!pgd_present(*pgd)) + if (!pgd_present(pgdp_get(pgd))) return 0; p4d = p4d_offset(pgd, address); - if (!p4d_present(*p4d)) + if (!p4d_present(p4dp_get(p4d))) return 0; pud = pud_offset(p4d, address); - if (!pud_present(*pud)) + if (!pud_present(pudp_get(pud))) return 0; - if (pud_trans_huge(*pud)) + if (pud_trans_huge(pudp_get(pud))) return PUD_SHIFT; pmd = pmd_offset(pud, address); - if (!pmd_present(*pmd)) + if (!pmd_present(pmdp_get(pmd))) return 0; - if (pmd_trans_huge(*pmd)) + if (pmd_trans_huge(pmdp_get(pmd))) return PMD_SHIFT; pte = pte_offset_map(pmd, address); if (!pte) @@ -705,7 +705,7 @@ static int check_hwpoisoned_entry(pte_t pte, unsigned long addr, short shift, static int check_hwpoisoned_pmd_entry(pmd_t *pmdp, unsigned long addr, struct hwpoison_walk *hwp) { - pmd_t pmd = *pmdp; + pmd_t pmd = pmdp_get(pmdp); unsigned long pfn; unsigned long hwpoison_vaddr; diff --git a/mm/memory.c b/mm/memory.c index 0c295e2fe8e8..1880bae463c6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -189,7 +189,7 @@ void mm_trace_rss_stat(struct mm_struct *mm, int member) static void free_pte_range(struct mmu_gather *tlb, pmd_t *pmd, unsigned long addr) { - pgtable_t token = pmd_pgtable(*pmd); + pgtable_t token = pmd_pgtable(pmdp_get(pmd)); pmd_clear(pmd); pte_free_tlb(tlb, token, addr); mm_dec_nr_ptes(tlb->mm); @@ -426,7 +426,7 @@ void pmd_install(struct mm_struct *mm, pmd_t *pmd, pgtable_t *pte) { spinlock_t *ptl = pmd_lock(mm, pmd); - if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ + if (likely(pmd_none(pmdp_get(pmd)))) { /* Has another populated it ? */ mm_inc_nr_ptes(mm); /* * Ensure all pte setup (eg. pte page lock and page clearing) are @@ -467,7 +467,7 @@ int __pte_alloc_kernel(pmd_t *pmd) return -ENOMEM; spin_lock(&init_mm.page_table_lock); - if (likely(pmd_none(*pmd))) { /* Has another populated it ? */ + if (likely(pmd_none(pmdp_get(pmd)))) { /* Has another populated it ? */ smp_wmb(); /* See comment in pmd_install() */ pmd_populate_kernel(&init_mm, pmd, new); new = NULL; @@ -532,9 +532,9 @@ static void __print_bad_page_map_pgtable(struct mm_struct *mm, unsigned long add * see locking requirements for print_bad_page_map(). */ pgdp = pgd_offset(mm, addr); - pgdv = pgd_val(*pgdp); + pgdv = pgd_val(pgdp_get(pgdp)); - if (!pgd_present(*pgdp) || pgd_leaf(*pgdp)) { + if (!pgd_present(pgdp_get(pgdp)) || pgd_leaf(pgdp_get(pgdp))) { pr_alert("pgd:%08llx\n", pgdv); return; } @@ -1374,7 +1374,7 @@ copy_pmd_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, src_pmd = pmd_offset(src_pud, addr); do { next = pmd_addr_end(addr, end); - if (is_swap_pmd(*src_pmd) || pmd_trans_huge(*src_pmd)) { + if (is_swap_pmd(pmdp_get(src_pmd)) || pmd_trans_huge(pmdp_get(src_pmd))) { int err; VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, src_vma); err = copy_huge_pmd(dst_mm, src_mm, dst_pmd, src_pmd, @@ -1410,7 +1410,7 @@ copy_pud_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma, src_pud = pud_offset(src_p4d, addr); do { next = pud_addr_end(addr, end); - if (pud_trans_huge(*src_pud)) { + if (pud_trans_huge(pudp_get(src_pud))) { int err; VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, src_vma); @@ -1921,7 +1921,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, pmd = pmd_offset(pud, addr); do { next = pmd_addr_end(addr, end); - if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd)) { + if (is_swap_pmd(pmdp_get(pmd)) || pmd_trans_huge(pmdp_get(pmd))) { if (next - addr != HPAGE_PMD_SIZE) __split_huge_pmd(vma, pmd, addr, false); else if (zap_huge_pmd(tlb, vma, pmd, addr)) { @@ -1931,7 +1931,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, /* fall through */ } else if (details && details->single_folio && folio_test_pmd_mappable(details->single_folio) && - next - addr == HPAGE_PMD_SIZE && pmd_none(*pmd)) { + next - addr == HPAGE_PMD_SIZE && pmd_none(pmdp_get(pmd))) { spinlock_t *ptl = pmd_lock(tlb->mm, pmd); /* * Take and drop THP pmd lock so that we cannot return @@ -1940,7 +1940,7 @@ static inline unsigned long zap_pmd_range(struct mmu_gather *tlb, */ spin_unlock(ptl); } - if (pmd_none(*pmd)) { + if (pmd_none(pmdp_get(pmd))) { addr = next; continue; } @@ -1963,7 +1963,7 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb, pud = pud_offset(p4d, addr); do { next = pud_addr_end(addr, end); - if (pud_trans_huge(*pud)) { + if (pud_trans_huge(pudp_get(pud))) { if (next - addr != HPAGE_PUD_SIZE) { mmap_assert_locked(tlb->mm); split_huge_pud(vma, pud, addr); @@ -2211,7 +2211,7 @@ static pmd_t *walk_to_pmd(struct mm_struct *mm, unsigned long addr) if (!pmd) return NULL; - VM_BUG_ON(pmd_trans_huge(*pmd)); + VM_BUG_ON(pmd_trans_huge(pmdp_get(pmd))); return pmd; } @@ -2845,7 +2845,7 @@ static inline int remap_pmd_range(struct mm_struct *mm, pud_t *pud, pmd = pmd_alloc(mm, pud, addr); if (!pmd) return -ENOMEM; - VM_BUG_ON(pmd_trans_huge(*pmd)); + VM_BUG_ON(pmd_trans_huge(pmdp_get(pmd))); do { next = pmd_addr_end(addr, end); err = remap_pte_range(mm, pmd, addr, next, @@ -3164,7 +3164,7 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, unsigned long next; int err = 0; - BUG_ON(pud_leaf(*pud)); + BUG_ON(pud_leaf(pudp_get(pud))); if (create) { pmd = pmd_alloc_track(mm, pud, addr, mask); @@ -3175,11 +3175,11 @@ static int apply_to_pmd_range(struct mm_struct *mm, pud_t *pud, } do { next = pmd_addr_end(addr, end); - if (pmd_none(*pmd) && !create) + if (pmd_none(pmdp_get(pmd)) && !create) continue; - if (WARN_ON_ONCE(pmd_leaf(*pmd))) + if (WARN_ON_ONCE(pmd_leaf(pmdp_get(pmd)))) return -EINVAL; - if (!pmd_none(*pmd) && WARN_ON_ONCE(pmd_bad(*pmd))) { + if (!pmd_none(pmdp_get(pmd)) && WARN_ON_ONCE(pmd_bad(pmdp_get(pmd)))) { if (!create) continue; pmd_clear_bad(pmd); @@ -3211,11 +3211,11 @@ static int apply_to_pud_range(struct mm_struct *mm, p4d_t *p4d, } do { next = pud_addr_end(addr, end); - if (pud_none(*pud) && !create) + if (pud_none(pudp_get(pud)) && !create) continue; - if (WARN_ON_ONCE(pud_leaf(*pud))) + if (WARN_ON_ONCE(pud_leaf(pudp_get(pud)))) return -EINVAL; - if (!pud_none(*pud) && WARN_ON_ONCE(pud_bad(*pud))) { + if (!pud_none(pudp_get(pud)) && WARN_ON_ONCE(pud_bad(pudp_get(pud)))) { if (!create) continue; pud_clear_bad(pud); @@ -3247,11 +3247,11 @@ static int apply_to_p4d_range(struct mm_struct *mm, pgd_t *pgd, } do { next = p4d_addr_end(addr, end); - if (p4d_none(*p4d) && !create) + if (p4d_none(p4dp_get(p4d)) && !create) continue; - if (WARN_ON_ONCE(p4d_leaf(*p4d))) + if (WARN_ON_ONCE(p4d_leaf(p4dp_get(p4d)))) return -EINVAL; - if (!p4d_none(*p4d) && WARN_ON_ONCE(p4d_bad(*p4d))) { + if (!p4d_none(p4dp_get(p4d)) && WARN_ON_ONCE(p4d_bad(p4dp_get(p4d)))) { if (!create) continue; p4d_clear_bad(p4d); @@ -3281,13 +3281,13 @@ static int __apply_to_page_range(struct mm_struct *mm, unsigned long addr, pgd = pgd_offset(mm, addr); do { next = pgd_addr_end(addr, end); - if (pgd_none(*pgd) && !create) + if (pgd_none(pgdp_get(pgd)) && !create) continue; - if (WARN_ON_ONCE(pgd_leaf(*pgd))) { + if (WARN_ON_ONCE(pgd_leaf(pgdp_get(pgd)))) { err = -EINVAL; break; } - if (!pgd_none(*pgd) && WARN_ON_ONCE(pgd_bad(*pgd))) { + if (!pgd_none(pgdp_get(pgd)) && WARN_ON_ONCE(pgd_bad(pgdp_get(pgd)))) { if (!create) continue; pgd_clear_bad(pgd); @@ -5272,7 +5272,7 @@ static vm_fault_t __do_fault(struct vm_fault *vmf) * unlock_page(B) * # flush A, B to clear the writeback */ - if (pmd_none(*vmf->pmd) && !vmf->prealloc_pte) { + if (pmd_none(pmdp_get(vmf->pmd)) && !vmf->prealloc_pte) { vmf->prealloc_pte = pte_alloc_one(vma->vm_mm); if (!vmf->prealloc_pte) return VM_FAULT_OOM; @@ -5367,7 +5367,7 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct folio *folio, struct page *pa } vmf->ptl = pmd_lock(vma->vm_mm, vmf->pmd); - if (unlikely(!pmd_none(*vmf->pmd))) + if (unlikely(!pmd_none(pmdp_get(vmf->pmd)))) goto out; flush_icache_pages(vma, page, HPAGE_PMD_NR); @@ -5519,7 +5519,7 @@ vm_fault_t finish_fault(struct vm_fault *vmf) file_end < folio_next_index(folio); } - if (pmd_none(*vmf->pmd)) { + if (pmd_none(pmdp_get(vmf->pmd))) { if (!needs_fallback && folio_test_pmd_mappable(folio)) { ret = do_set_pmd(vmf, folio, page); if (ret != VM_FAULT_FALLBACK) @@ -5664,7 +5664,7 @@ static vm_fault_t do_fault_around(struct vm_fault *vmf) to_pte = min3(from_pte + nr_pages, (pgoff_t)PTRS_PER_PTE, pte_off + vma_pages(vmf->vma) - vma_off) - 1; - if (pmd_none(*vmf->pmd)) { + if (pmd_none(pmdp_get(vmf->pmd))) { vmf->prealloc_pte = pte_alloc_one(vmf->vma->vm_mm); if (!vmf->prealloc_pte) return VM_FAULT_OOM; @@ -6152,7 +6152,7 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) { pte_t entry; - if (unlikely(pmd_none(*vmf->pmd))) { + if (unlikely(pmd_none(pmdp_get(vmf->pmd)))) { /* * Leave __pte_alloc() until later: because vm_ops->fault may * want to allocate huge page, and if we expose page table @@ -6268,13 +6268,13 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, if (!vmf.pud) return VM_FAULT_OOM; retry_pud: - if (pud_none(*vmf.pud) && + if (pud_none(pudp_get(vmf.pud)) && thp_vma_allowable_order(vma, vm_flags, TVA_PAGEFAULT, PUD_ORDER)) { ret = create_huge_pud(&vmf); if (!(ret & VM_FAULT_FALLBACK)) return ret; } else { - pud_t orig_pud = *vmf.pud; + pud_t orig_pud = pudp_get(vmf.pud); barrier(); if (pud_trans_huge(orig_pud)) { @@ -6302,7 +6302,7 @@ static vm_fault_t __handle_mm_fault(struct vm_area_struct *vma, if (pud_trans_unstable(vmf.pud)) goto retry_pud; - if (pmd_none(*vmf.pmd) && + if (pmd_none(pmdp_get(vmf.pmd)) && thp_vma_allowable_order(vma, vm_flags, TVA_PAGEFAULT, PMD_ORDER)) { ret = create_huge_pmd(&vmf); if (!(ret & VM_FAULT_FALLBACK)) @@ -6546,7 +6546,7 @@ int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) return -ENOMEM; spin_lock(&mm->page_table_lock); - if (pgd_present(*pgd)) { /* Another has populated it */ + if (pgd_present(pgdp_get(pgd))) { /* Another has populated it */ p4d_free(mm, new); } else { smp_wmb(); /* See comment in pmd_install() */ @@ -6569,7 +6569,7 @@ int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) return -ENOMEM; spin_lock(&mm->page_table_lock); - if (!p4d_present(*p4d)) { + if (!p4d_present(p4dp_get(p4d))) { mm_inc_nr_puds(mm); smp_wmb(); /* See comment in pmd_install() */ p4d_populate(mm, p4d, new); @@ -6593,7 +6593,7 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) return -ENOMEM; ptl = pud_lock(mm, pud); - if (!pud_present(*pud)) { + if (!pud_present(pudp_get(pud))) { mm_inc_nr_pmds(mm); smp_wmb(); /* See comment in pmd_install() */ pud_populate(mm, pud, new); @@ -6686,7 +6686,7 @@ int follow_pfnmap_start(struct follow_pfnmap_args *args) goto out; retry: pgdp = pgd_offset(mm, address); - if (pgd_none(*pgdp) || unlikely(pgd_bad(*pgdp))) + if (pgd_none(pgdp_get(pgdp)) || unlikely(pgd_bad(pgdp_get(pgdp)))) goto out; p4dp = p4d_offset(pgdp, address); diff --git a/mm/mempolicy.c b/mm/mempolicy.c index eb83cff7db8c..8eef680d0f0e 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -645,11 +645,11 @@ static void queue_folios_pmd(pmd_t *pmd, struct mm_walk *walk) struct folio *folio; struct queue_pages *qp = walk->private; - if (unlikely(is_pmd_migration_entry(*pmd))) { + if (unlikely(is_pmd_migration_entry(pmdp_get(pmd)))) { qp->nr_failed++; return; } - folio = pmd_folio(*pmd); + folio = pmd_folio(pmdp_get(pmd)); if (is_huge_zero_folio(folio)) { walk->action = ACTION_CONTINUE; return; diff --git a/mm/migrate.c b/mm/migrate.c index c0e9f15be2a2..98b5fe2a8994 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -542,9 +542,9 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) spinlock_t *ptl; ptl = pmd_lock(mm, pmd); - if (!is_pmd_migration_entry(*pmd)) + if (!is_pmd_migration_entry(pmdp_get(pmd))) goto unlock; - migration_entry_wait_on_locked(pmd_to_swp_entry(*pmd), ptl); + migration_entry_wait_on_locked(pmd_to_swp_entry(pmdp_get(pmd)), ptl); return; unlock: spin_unlock(ptl); diff --git a/mm/migrate_device.c b/mm/migrate_device.c index abd9f6850db6..9714448eb97d 100644 --- a/mm/migrate_device.c +++ b/mm/migrate_device.c @@ -69,19 +69,19 @@ static int migrate_vma_collect_pmd(pmd_t *pmdp, pte_t *ptep; again: - if (pmd_none(*pmdp)) + if (pmd_none(pmdp_get(pmdp))) return migrate_vma_collect_hole(start, end, -1, walk); - if (pmd_trans_huge(*pmdp)) { + if (pmd_trans_huge(pmdp_get(pmdp))) { struct folio *folio; ptl = pmd_lock(mm, pmdp); - if (unlikely(!pmd_trans_huge(*pmdp))) { + if (unlikely(!pmd_trans_huge(pmdp_get(pmdp)))) { spin_unlock(ptl); goto again; } - folio = pmd_folio(*pmdp); + folio = pmd_folio(pmdp_get(pmdp)); if (is_huge_zero_folio(folio)) { spin_unlock(ptl); split_huge_pmd(vma, pmdp, addr); @@ -615,7 +615,7 @@ static void migrate_vma_insert_page(struct migrate_vma *migrate, pmdp = pmd_alloc(mm, pudp, addr); if (!pmdp) goto abort; - if (pmd_trans_huge(*pmdp)) + if (pmd_trans_huge(pmdp_get(pmdp))) goto abort; if (pte_alloc(mm, pmdp)) goto abort; diff --git a/mm/mlock.c b/mm/mlock.c index bb0776f5ef7c..c55ab38656d0 100644 --- a/mm/mlock.c +++ b/mm/mlock.c @@ -361,11 +361,11 @@ static int mlock_pte_range(pmd_t *pmd, unsigned long addr, ptl = pmd_trans_huge_lock(pmd, vma); if (ptl) { - if (!pmd_present(*pmd)) + if (!pmd_present(pmdp_get(pmd))) goto out; - if (is_huge_zero_pmd(*pmd)) + if (is_huge_zero_pmd(pmdp_get(pmd))) goto out; - folio = pmd_folio(*pmd); + folio = pmd_folio(pmdp_get(pmd)); if (folio_is_zone_device(folio)) goto out; if (vma->vm_flags & VM_LOCKED) diff --git a/mm/mprotect.c b/mm/mprotect.c index 988c366137d5..912a5847a4f3 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -530,7 +530,7 @@ static inline long change_pmd_range(struct mmu_gather *tlb, break; } - if (pmd_none(*pmd)) + if (pmd_none(pmdp_get(pmd))) goto next; _pmd = pmdp_get_lockless(pmd); diff --git a/mm/mremap.c b/mm/mremap.c index 419a0ea0a870..5b43ef4ff547 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -103,7 +103,7 @@ static pmd_t *get_old_pmd(struct mm_struct *mm, unsigned long addr) return NULL; pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) + if (pmd_none(pmdp_get(pmd))) return NULL; return pmd; @@ -135,7 +135,7 @@ static pmd_t *alloc_new_pmd(struct mm_struct *mm, unsigned long addr) if (!pmd) return NULL; - VM_BUG_ON(pmd_trans_huge(*pmd)); + VM_BUG_ON(pmd_trans_huge(pmdp_get(pmd))); return pmd; } @@ -260,7 +260,7 @@ static int move_ptes(struct pagetable_move_control *pmc, for (; old_addr < old_end; old_ptep += nr_ptes, old_addr += nr_ptes * PAGE_SIZE, new_ptep += nr_ptes, new_addr += nr_ptes * PAGE_SIZE) { - VM_WARN_ON_ONCE(!pte_none(*new_ptep)); + VM_WARN_ON_ONCE(!pte_none(ptep_get(new_ptep))); nr_ptes = 1; max_nr_ptes = (old_end - old_addr) >> PAGE_SHIFT; @@ -379,7 +379,7 @@ static bool move_normal_pmd(struct pagetable_move_control *pmc, * One alternative might be to just unmap the target pmd at * this point, and verify that it really is empty. We'll see. */ - if (WARN_ON_ONCE(!pmd_none(*new_pmd))) + if (WARN_ON_ONCE(!pmd_none(pmdp_get(new_pmd)))) return false; /* @@ -391,7 +391,7 @@ static bool move_normal_pmd(struct pagetable_move_control *pmc, if (new_ptl != old_ptl) spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); - pmd = *old_pmd; + pmd = pmdp_get(old_pmd); /* Racing with collapse? */ if (unlikely(!pmd_present(pmd) || pmd_leaf(pmd))) @@ -400,7 +400,7 @@ static bool move_normal_pmd(struct pagetable_move_control *pmc, pmd_clear(old_pmd); res = true; - VM_BUG_ON(!pmd_none(*new_pmd)); + VM_BUG_ON(!pmd_none(pmdp_get(new_pmd))); pmd_populate(mm, new_pmd, pmd_pgtable(pmd)); flush_tlb_range(vma, pmc->old_addr, pmc->old_addr + PMD_SIZE); @@ -436,7 +436,7 @@ static bool move_normal_pud(struct pagetable_move_control *pmc, * The destination pud shouldn't be established, free_pgtables() * should have released it. */ - if (WARN_ON_ONCE(!pud_none(*new_pud))) + if (WARN_ON_ONCE(!pud_none(pudp_get(new_pud)))) return false; /* @@ -449,10 +449,10 @@ static bool move_normal_pud(struct pagetable_move_control *pmc, spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); /* Clear the pud */ - pud = *old_pud; + pud = pudp_get(old_pud); pud_clear(old_pud); - VM_BUG_ON(!pud_none(*new_pud)); + VM_BUG_ON(!pud_none(pudp_get(new_pud))); pud_populate(mm, new_pud, pud_pgtable(pud)); flush_tlb_range(vma, pmc->old_addr, pmc->old_addr + PUD_SIZE); @@ -483,7 +483,7 @@ static bool move_huge_pud(struct pagetable_move_control *pmc, * The destination pud shouldn't be established, free_pgtables() * should have released it. */ - if (WARN_ON_ONCE(!pud_none(*new_pud))) + if (WARN_ON_ONCE(!pud_none(pudp_get(new_pud)))) return false; /* @@ -496,10 +496,10 @@ static bool move_huge_pud(struct pagetable_move_control *pmc, spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); /* Clear the pud */ - pud = *old_pud; + pud = pudp_get(old_pud); pud_clear(old_pud); - VM_BUG_ON(!pud_none(*new_pud)); + VM_BUG_ON(!pud_none(pudp_get(new_pud))); /* Set the new pud */ /* mark soft_ditry when we add pud level soft dirty support */ @@ -828,7 +828,7 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc) new_pud = alloc_new_pud(mm, pmc->new_addr); if (!new_pud) break; - if (pud_trans_huge(*old_pud)) { + if (pud_trans_huge(pudp_get(old_pud))) { if (extent == HPAGE_PUD_SIZE) { move_pgt_entry(pmc, HPAGE_PUD, old_pud, new_pud); /* We ignore and continue on error? */ @@ -847,7 +847,7 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc) if (!new_pmd) break; again: - if (is_swap_pmd(*old_pmd) || pmd_trans_huge(*old_pmd)) { + if (is_swap_pmd(pmdp_get(old_pmd)) || pmd_trans_huge(pmdp_get(old_pmd))) { if (extent == HPAGE_PMD_SIZE && move_pgt_entry(pmc, HPAGE_PMD, old_pmd, new_pmd)) continue; @@ -861,7 +861,7 @@ unsigned long move_page_tables(struct pagetable_move_control *pmc) if (move_pgt_entry(pmc, NORMAL_PMD, old_pmd, new_pmd)) continue; } - if (pmd_none(*old_pmd)) + if (pmd_none(pmdp_get(old_pmd))) continue; if (pte_alloc(pmc->new->vm_mm, new_pmd)) break; diff --git a/mm/page_table_check.c b/mm/page_table_check.c index 4eeca782b888..31f4c39d20ef 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -230,7 +230,7 @@ void __page_table_check_pmds_set(struct mm_struct *mm, pmd_t *pmdp, pmd_t pmd, page_table_check_pmd_flags(pmd); for (i = 0; i < nr; i++) - __page_table_check_pmd_clear(mm, *(pmdp + i)); + __page_table_check_pmd_clear(mm, pmdp_get(pmdp + i)); if (pmd_user_accessible_page(pmd)) page_table_check_set(pmd_pfn(pmd), stride * nr, pmd_write(pmd)); } @@ -246,7 +246,7 @@ void __page_table_check_puds_set(struct mm_struct *mm, pud_t *pudp, pud_t pud, return; for (i = 0; i < nr; i++) - __page_table_check_pud_clear(mm, *(pudp + i)); + __page_table_check_pud_clear(mm, pudp_get((pudp + i))); if (pud_user_accessible_page(pud)) page_table_check_set(pud_pfn(pud), stride * nr, pud_write(pud)); } diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c index c498a91b6706..6c08d0215308 100644 --- a/mm/page_vma_mapped.c +++ b/mm/page_vma_mapped.c @@ -223,17 +223,17 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw) restart: do { pgd = pgd_offset(mm, pvmw->address); - if (!pgd_present(*pgd)) { + if (!pgd_present(pgdp_get(pgd))) { step_forward(pvmw, PGDIR_SIZE); continue; } p4d = p4d_offset(pgd, pvmw->address); - if (!p4d_present(*p4d)) { + if (!p4d_present(p4dp_get(p4d))) { step_forward(pvmw, P4D_SIZE); continue; } pud = pud_offset(p4d, pvmw->address); - if (!pud_present(*pud)) { + if (!pud_present(pudp_get(pud))) { step_forward(pvmw, PUD_SIZE); continue; } diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 9f91cf85a5be..269ba20b63cf 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -109,7 +109,7 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, do { again: next = pmd_addr_end(addr, end); - if (pmd_none(*pmd)) { + if (pmd_none(pmdp_get(pmd))) { if (has_install) err = __pte_alloc(walk->mm, pmd); else if (ops->pte_hole) @@ -143,13 +143,13 @@ static int walk_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, * We are ONLY installing, so avoid unnecessarily * splitting a present huge page. */ - if (pmd_present(*pmd) && pmd_trans_huge(*pmd)) + if (pmd_present(pmdp_get(pmd)) && pmd_trans_huge(pmdp_get(pmd))) continue; } if (walk->vma) split_huge_pmd(walk->vma, pmd, addr); - else if (pmd_leaf(*pmd) || !pmd_present(*pmd)) + else if (pmd_leaf(pmdp_get(pmd)) || !pmd_present(pmdp_get(pmd))) continue; /* Nothing to do. */ err = walk_pte_range(pmd, addr, next, walk); @@ -179,7 +179,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, do { again: next = pud_addr_end(addr, end); - if (pud_none(*pud)) { + if (pud_none(pudp_get(pud))) { if (has_install) err = __pmd_alloc(walk->mm, pud, addr); else if (ops->pte_hole) @@ -209,16 +209,16 @@ static int walk_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, * We are ONLY installing, so avoid unnecessarily * splitting a present huge page. */ - if (pud_present(*pud) && pud_trans_huge(*pud)) + if (pud_present(pudp_get(pud)) && pud_trans_huge(pudp_get(pud))) continue; } if (walk->vma) split_huge_pud(walk->vma, pud, addr); - else if (pud_leaf(*pud) || !pud_present(*pud)) + else if (pud_leaf(pudp_get(pud)) || !pud_present(pudp_get(pud))) continue; /* Nothing to do. */ - if (pud_none(*pud)) + if (pud_none(pudp_get(pud))) goto again; err = walk_pmd_range(pud, addr, next, walk); diff --git a/mm/percpu.c b/mm/percpu.c index 81462ce5866e..1652beb28917 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -3136,25 +3136,25 @@ void __init __weak pcpu_populate_pte(unsigned long addr) pud_t *pud; pmd_t *pmd; - if (pgd_none(*pgd)) { + if (pgd_none(pgdp_get(pgd))) { p4d = memblock_alloc_or_panic(P4D_TABLE_SIZE, P4D_TABLE_SIZE); pgd_populate_kernel(addr, pgd, p4d); } p4d = p4d_offset(pgd, addr); - if (p4d_none(*p4d)) { + if (p4d_none(p4dp_get(p4d))) { pud = memblock_alloc_or_panic(PUD_TABLE_SIZE, PUD_TABLE_SIZE); p4d_populate_kernel(addr, p4d, pud); } pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { + if (pud_none(pudp_get(pud))) { pmd = memblock_alloc_or_panic(PMD_TABLE_SIZE, PMD_TABLE_SIZE); pud_populate(&init_mm, pud, pmd); } pmd = pmd_offset(pud, addr); - if (!pmd_present(*pmd)) { + if (!pmd_present(pmdp_get(pmd))) { pte_t *new; new = memblock_alloc_or_panic(PTE_TABLE_SIZE, PTE_TABLE_SIZE); diff --git a/mm/pgalloc-track.h b/mm/pgalloc-track.h index e9e879de8649..c5bb948416f0 100644 --- a/mm/pgalloc-track.h +++ b/mm/pgalloc-track.h @@ -7,7 +7,7 @@ static inline p4d_t *p4d_alloc_track(struct mm_struct *mm, pgd_t *pgd, unsigned long address, pgtbl_mod_mask *mod_mask) { - if (unlikely(pgd_none(*pgd))) { + if (unlikely(pgd_none(pgdp_get(pgd)))) { if (__p4d_alloc(mm, pgd, address)) return NULL; *mod_mask |= PGTBL_PGD_MODIFIED; @@ -20,7 +20,7 @@ static inline pud_t *pud_alloc_track(struct mm_struct *mm, p4d_t *p4d, unsigned long address, pgtbl_mod_mask *mod_mask) { - if (unlikely(p4d_none(*p4d))) { + if (unlikely(p4d_none(p4dp_get(p4d)))) { if (__pud_alloc(mm, p4d, address)) return NULL; *mod_mask |= PGTBL_P4D_MODIFIED; @@ -33,7 +33,7 @@ static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud, unsigned long address, pgtbl_mod_mask *mod_mask) { - if (unlikely(pud_none(*pud))) { + if (unlikely(pud_none(pudp_get(pud)))) { if (__pmd_alloc(mm, pud, address)) return NULL; *mod_mask |= PGTBL_PUD_MODIFIED; @@ -44,7 +44,7 @@ static inline pmd_t *pmd_alloc_track(struct mm_struct *mm, pud_t *pud, #endif /* CONFIG_MMU */ #define pte_alloc_kernel_track(pmd, address, mask) \ - ((unlikely(pmd_none(*(pmd))) && \ + ((unlikely(pmd_none(pmdp_get(pmd))) && \ (__pte_alloc_kernel(pmd) || ({*(mask)|=PGTBL_PMD_MODIFIED;0;})))?\ NULL: pte_offset_kernel(pmd, address)) diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 567e2d084071..63a573306bfa 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -24,14 +24,14 @@ void pgd_clear_bad(pgd_t *pgd) { - pgd_ERROR(*pgd); + pgd_ERROR(pgdp_get(pgd)); pgd_clear(pgd); } #ifndef __PAGETABLE_P4D_FOLDED void p4d_clear_bad(p4d_t *p4d) { - p4d_ERROR(*p4d); + p4d_ERROR(p4dp_get(p4d)); p4d_clear(p4d); } #endif @@ -39,7 +39,7 @@ void p4d_clear_bad(p4d_t *p4d) #ifndef __PAGETABLE_PUD_FOLDED void pud_clear_bad(pud_t *pud) { - pud_ERROR(*pud); + pud_ERROR(pudp_get(pud)); pud_clear(pud); } #endif @@ -51,7 +51,7 @@ void pud_clear_bad(pud_t *pud) */ void pmd_clear_bad(pmd_t *pmd) { - pmd_ERROR(*pmd); + pmd_ERROR(pmdp_get(pmd)); pmd_clear(pmd); } @@ -110,7 +110,7 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty) { - int changed = !pmd_same(*pmdp, entry); + int changed = !pmd_same(pmdp_get(pmdp), entry); VM_BUG_ON(address & ~HPAGE_PMD_MASK); if (changed) { set_pmd_at(vma->vm_mm, address, pmdp, entry); @@ -139,7 +139,7 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, { pmd_t pmd; VM_BUG_ON(address & ~HPAGE_PMD_MASK); - VM_BUG_ON(pmd_present(*pmdp) && !pmd_trans_huge(*pmdp)); + VM_BUG_ON(pmd_present(pmdp_get(pmdp)) && !pmd_trans_huge(pmdp_get(pmdp))); pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; @@ -152,7 +152,7 @@ pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, pud_t pud; VM_BUG_ON(address & ~HPAGE_PUD_MASK); - VM_BUG_ON(!pud_trans_huge(*pudp)); + VM_BUG_ON(!pud_trans_huge(pudp_get(pudp))); pud = pudp_huge_get_and_clear(vma->vm_mm, address, pudp); flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE); return pud; @@ -197,8 +197,9 @@ pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp) pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - VM_WARN_ON_ONCE(!pmd_present(*pmdp)); - pmd_t old = pmdp_establish(vma, address, pmdp, pmd_mkinvalid(*pmdp)); + VM_WARN_ON_ONCE(!pmd_present(pmdp_get(pmdp))); + pmd_t old = pmdp_establish(vma, address, pmdp, + pmd_mkinvalid(pmdp_get(pmdp))); flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return old; } @@ -208,7 +209,7 @@ pmd_t pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, pmd_t pmdp_invalidate_ad(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) { - VM_WARN_ON_ONCE(!pmd_present(*pmdp)); + VM_WARN_ON_ONCE(!pmd_present(pmdp_get(pmdp))); return pmdp_invalidate(vma, address, pmdp); } #endif @@ -224,7 +225,7 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, unsigned long address, pmd_t pmd; VM_BUG_ON(address & ~HPAGE_PMD_MASK); - VM_BUG_ON(pmd_trans_huge(*pmdp)); + VM_BUG_ON(pmd_trans_huge(pmdp_get(pmdp))); pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp); /* collapse entails shooting down ptes not pmd */ diff --git a/mm/rmap.c b/mm/rmap.c index ac4f783d6ec2..aafefc1d7955 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -819,15 +819,15 @@ pmd_t *mm_find_pmd(struct mm_struct *mm, unsigned long address) pmd_t *pmd = NULL; pgd = pgd_offset(mm, address); - if (!pgd_present(*pgd)) + if (!pgd_present(pgdp_get(pgd))) goto out; p4d = p4d_offset(pgd, address); - if (!p4d_present(*p4d)) + if (!p4d_present(p4dp_get(p4d))) goto out; pud = pud_offset(p4d, address); - if (!pud_present(*pud)) + if (!pud_present(pudp_get(pud))) goto out; pmd = pmd_offset(pud, address); @@ -1048,7 +1048,7 @@ static int page_vma_mkclean_one(struct page_vma_mapped_walk *pvmw) pmd_t *pmd = pvmw->pmd; pmd_t entry; - if (!pmd_dirty(*pmd) && !pmd_write(*pmd)) + if (!pmd_dirty(pmdp_get(pmd)) && !pmd_write(pmdp_get(pmd))) continue; flush_cache_range(vma, address, diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c index 37522d6cb398..be065c57611d 100644 --- a/mm/sparse-vmemmap.c +++ b/mm/sparse-vmemmap.c @@ -198,7 +198,7 @@ static void * __meminit vmemmap_alloc_block_zero(unsigned long size, int node) pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) { pmd_t *pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) { + if (pmd_none(pmdp_get(pmd))) { void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); if (!p) return NULL; @@ -211,7 +211,7 @@ pmd_t * __meminit vmemmap_pmd_populate(pud_t *pud, unsigned long addr, int node) pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node) { pud_t *pud = pud_offset(p4d, addr); - if (pud_none(*pud)) { + if (pud_none(pudp_get(pud))) { void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); if (!p) return NULL; @@ -224,7 +224,7 @@ pud_t * __meminit vmemmap_pud_populate(p4d_t *p4d, unsigned long addr, int node) p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) { p4d_t *p4d = p4d_offset(pgd, addr); - if (p4d_none(*p4d)) { + if (p4d_none(p4dp_get(p4d))) { void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); if (!p) return NULL; @@ -237,7 +237,7 @@ p4d_t * __meminit vmemmap_p4d_populate(pgd_t *pgd, unsigned long addr, int node) pgd_t * __meminit vmemmap_pgd_populate(unsigned long addr, int node) { pgd_t *pgd = pgd_offset_k(addr); - if (pgd_none(*pgd)) { + if (pgd_none(pgdp_get(pgd))) { void *p = vmemmap_alloc_block_zero(PAGE_SIZE, node); if (!p) return NULL; diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index af61b95c89e4..931c26914ef5 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1306,8 +1306,8 @@ static long move_pages_ptes(struct mm_struct *mm, pmd_t *dst_pmd, pmd_t *src_pmd } /* Sanity checks before the operation */ - if (pmd_none(*dst_pmd) || pmd_none(*src_pmd) || - pmd_trans_huge(*dst_pmd) || pmd_trans_huge(*src_pmd)) { + if (pmd_none(pmdp_get(dst_pmd)) || pmd_none(pmdp_get(src_pmd)) || + pmd_trans_huge(pmdp_get(dst_pmd)) || pmd_trans_huge(pmdp_get(src_pmd))) { ret = -EINVAL; goto out; } @@ -1897,8 +1897,8 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, if (move_splits_huge_pmd(dst_addr, src_addr, src_start + len) || !pmd_none(dst_pmdval)) { /* Can be a migration entry */ - if (pmd_present(*src_pmd)) { - struct folio *folio = pmd_folio(*src_pmd); + if (pmd_present(pmdp_get(src_pmd))) { + struct folio *folio = pmd_folio(pmdp_get(src_pmd)); if (!is_huge_zero_folio(folio) && !PageAnonExclusive(&folio->page)) { @@ -1921,7 +1921,7 @@ ssize_t move_pages(struct userfaultfd_ctx *ctx, unsigned long dst_start, } else { long ret; - if (pmd_none(*src_pmd)) { + if (pmd_none(pmdp_get(src_pmd))) { if (!(mode & UFFDIO_MOVE_MODE_ALLOW_SRC_HOLES)) { err = -ENOENT; break; diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 798b2ed21e46..7bafe94d501f 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -155,7 +155,7 @@ static int vmap_try_huge_pmd(pmd_t *pmd, unsigned long addr, unsigned long end, if (!IS_ALIGNED(phys_addr, PMD_SIZE)) return 0; - if (pmd_present(*pmd) && !pmd_free_pte_page(pmd, addr)) + if (pmd_present(pmdp_get(pmd)) && !pmd_free_pte_page(pmd, addr)) return 0; return pmd_set_huge(pmd, phys_addr, prot); @@ -205,7 +205,7 @@ static int vmap_try_huge_pud(pud_t *pud, unsigned long addr, unsigned long end, if (!IS_ALIGNED(phys_addr, PUD_SIZE)) return 0; - if (pud_present(*pud) && !pud_free_pmd_page(pud, addr)) + if (pud_present(pudp_get(pud)) && !pud_free_pmd_page(pud, addr)) return 0; return pud_set_huge(pud, phys_addr, prot); @@ -256,7 +256,7 @@ static int vmap_try_huge_p4d(p4d_t *p4d, unsigned long addr, unsigned long end, if (!IS_ALIGNED(phys_addr, P4D_SIZE)) return 0; - if (p4d_present(*p4d) && !p4d_free_pud_page(p4d, addr)) + if (p4d_present(p4dp_get(p4d)) && !p4d_free_pud_page(p4d, addr)) return 0; return p4d_set_huge(p4d, phys_addr, prot); @@ -367,7 +367,8 @@ static void vunmap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, if (size != PAGE_SIZE) { if (WARN_ON(!IS_ALIGNED(addr, size))) { addr = ALIGN_DOWN(addr, size); - pte = PTR_ALIGN_DOWN(pte, sizeof(*pte) * (size >> PAGE_SHIFT)); + pte = PTR_ALIGN_DOWN(pte, + sizeof(ptep_get(pte)) * (size >> PAGE_SHIFT)); } ptent = huge_ptep_get_and_clear(&init_mm, addr, pte, size); if (WARN_ON(end - addr < size)) @@ -394,7 +395,7 @@ static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, next = pmd_addr_end(addr, end); cleared = pmd_clear_huge(pmd); - if (cleared || pmd_bad(*pmd)) + if (cleared || pmd_bad(pmdp_get(pmd))) *mask |= PGTBL_PMD_MODIFIED; if (cleared) { @@ -421,7 +422,7 @@ static void vunmap_pud_range(p4d_t *p4d, unsigned long addr, unsigned long end, next = pud_addr_end(addr, end); cleared = pud_clear_huge(pud); - if (cleared || pud_bad(*pud)) + if (cleared || pud_bad(pudp_get(pud))) *mask |= PGTBL_PUD_MODIFIED; if (cleared) { @@ -445,7 +446,7 @@ static void vunmap_p4d_range(pgd_t *pgd, unsigned long addr, unsigned long end, next = p4d_addr_end(addr, end); p4d_clear_huge(p4d); - if (p4d_bad(*p4d)) + if (p4d_bad(p4dp_get(p4d))) *mask |= PGTBL_P4D_MODIFIED; if (p4d_none_or_clear_bad(p4d)) @@ -477,7 +478,7 @@ void __vunmap_range_noflush(unsigned long start, unsigned long end) pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); - if (pgd_bad(*pgd)) + if (pgd_bad(pgdp_get(pgd))) mask |= PGTBL_PGD_MODIFIED; if (pgd_none_or_clear_bad(pgd)) continue; @@ -622,7 +623,7 @@ static int vmap_small_pages_range_noflush(unsigned long addr, unsigned long end, pgd = pgd_offset_k(addr); do { next = pgd_addr_end(addr, end); - if (pgd_bad(*pgd)) + if (pgd_bad(pgdp_get(pgd))) mask |= PGTBL_PGD_MODIFIED; err = vmap_pages_p4d_range(pgd, addr, next, prot, pages, &nr, &mask); if (err) @@ -792,35 +793,35 @@ struct page *vmalloc_to_page(const void *vmalloc_addr) */ VIRTUAL_BUG_ON(!is_vmalloc_or_module_addr(vmalloc_addr)); - if (pgd_none(*pgd)) + if (pgd_none(pgdp_get(pgd))) return NULL; - if (WARN_ON_ONCE(pgd_leaf(*pgd))) + if (WARN_ON_ONCE(pgd_leaf(pgdp_get(pgd)))) return NULL; /* XXX: no allowance for huge pgd */ - if (WARN_ON_ONCE(pgd_bad(*pgd))) + if (WARN_ON_ONCE(pgd_bad(pgdp_get(pgd)))) return NULL; p4d = p4d_offset(pgd, addr); - if (p4d_none(*p4d)) + if (p4d_none(p4dp_get(p4d))) return NULL; - if (p4d_leaf(*p4d)) - return p4d_page(*p4d) + ((addr & ~P4D_MASK) >> PAGE_SHIFT); - if (WARN_ON_ONCE(p4d_bad(*p4d))) + if (p4d_leaf(p4dp_get(p4d))) + return p4d_page(p4dp_get(p4d)) + ((addr & ~P4D_MASK) >> PAGE_SHIFT); + if (WARN_ON_ONCE(p4d_bad(p4dp_get(p4d)))) return NULL; pud = pud_offset(p4d, addr); - if (pud_none(*pud)) + if (pud_none(pudp_get(pud))) return NULL; - if (pud_leaf(*pud)) - return pud_page(*pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); - if (WARN_ON_ONCE(pud_bad(*pud))) + if (pud_leaf(pudp_get(pud))) + return pud_page(pudp_get(pud)) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + if (WARN_ON_ONCE(pud_bad(pudp_get(pud)))) return NULL; pmd = pmd_offset(pud, addr); - if (pmd_none(*pmd)) + if (pmd_none(pmdp_get(pmd))) return NULL; - if (pmd_leaf(*pmd)) - return pmd_page(*pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); - if (WARN_ON_ONCE(pmd_bad(*pmd))) + if (pmd_leaf(pmdp_get(pmd))) + return pmd_page(pmdp_get(pmd)) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + if (WARN_ON_ONCE(pmd_bad(pmdp_get(pmd)))) return NULL; ptep = pte_offset_kernel(pmd, addr); diff --git a/mm/vmscan.c b/mm/vmscan.c index 2239de111fa6..4401d20548e0 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -3612,7 +3612,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area DEFINE_MAX_SEQ(walk->lruvec); int gen = lru_gen_from_seq(max_seq); - VM_WARN_ON_ONCE(pud_leaf(*pud)); + VM_WARN_ON_ONCE(pud_leaf(pudp_get(pud))); /* try to batch at most 1+MIN_LRU_BATCH+1 entries */ if (*first == -1) { @@ -3642,17 +3642,17 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area /* don't round down the first address */ addr = i ? (*first & PMD_MASK) + i * PMD_SIZE : *first; - if (!pmd_present(pmd[i])) + if (!pmd_present(pmdp_get(pmd + i))) goto next; - if (!pmd_trans_huge(pmd[i])) { + if (!pmd_trans_huge(pmdp_get(pmd + i))) { if (!walk->force_scan && should_clear_pmd_young() && !mm_has_notifiers(args->mm)) pmdp_test_and_clear_young(vma, addr, pmd + i); goto next; } - pfn = get_pmd_pfn(pmd[i], vma, addr, pgdat); + pfn = get_pmd_pfn(pmdp_get(pmd + i), vma, addr, pgdat); if (pfn == -1) goto next; @@ -3670,7 +3670,7 @@ static void walk_pmd_range_locked(pud_t *pud, unsigned long addr, struct vm_area dirty = false; } - if (pmd_dirty(pmd[i])) + if (pmd_dirty(pmdp_get(pmd + i))) dirty = true; walk->mm_stats[MM_LEAF_YOUNG]++; @@ -3699,7 +3699,7 @@ static void walk_pmd_range(pud_t *pud, unsigned long start, unsigned long end, struct lru_gen_mm_walk *walk = args->private; struct lru_gen_mm_state *mm_state = get_mm_state(walk->lruvec); - VM_WARN_ON_ONCE(pud_leaf(*pud)); + VM_WARN_ON_ONCE(pud_leaf(pudp_get(pud))); /* * Finish an entire PMD in two passes: the first only reaches to PTE @@ -3768,7 +3768,7 @@ static int walk_pud_range(p4d_t *p4d, unsigned long start, unsigned long end, unsigned long next; struct lru_gen_mm_walk *walk = args->private; - VM_WARN_ON_ONCE(p4d_leaf(*p4d)); + VM_WARN_ON_ONCE(p4d_leaf(p4dp_get(p4d))); pud = pud_offset(p4d, start & P4D_MASK); restart: -- 2.47.2 Architectures may have special rules for accessing the hardware page tables (for example, atomicity/ordering requirements), so the generic MM code provides the pXXp_get() and set_pXX() hooks for architectures to implement. These accessor functions are often omitted where a raw pointer dereference is believed to be safe (i.e. race-free). However, RISC-V needs to use these hooks to rewrite the page table values at read/write time on some platforms. A raw pointer dereference will no longer produce the correct value on those platforms, so the generic code must always use the accessor functions. sparse can only report improper pointer dereferences if every page table pointer (variable, function argument, struct member) is individually marked with an attribute (similar to __user). So while this is possible, it would require invasive changes across all architectures. Instead, as an immediate first solution, add a checkpatch warning that will generally catch the prohibited pointer dereferences. Architecture code is ignored, as the raw dereferences may be safe on some architectures. Signed-off-by: Samuel Holland --- Changes in v3: - New patch for v3 scripts/checkpatch.pl | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl index 92669904eecc..55984d7361ea 100755 --- a/scripts/checkpatch.pl +++ b/scripts/checkpatch.pl @@ -7721,6 +7721,13 @@ sub process { ERROR("MISSING_SENTINEL", "missing sentinel in ID array\n" . "$here\n$stat\n"); } } + +# check for raw dereferences of hardware page table pointers + if ($realfile !~ m@^arch/@ && + $line =~ /(?))?(pte|p[mu4g]d)p?\b/) { + WARN("PAGE_TABLE_ACCESSORS", + "Use $3p_get()/set_$3() instead of dereferencing page table pointers\n" . $herecurr); + } } # If we have no input at all, then there is nothing to report on -- 2.47.2 Currently, some functions such as pte_offset_map() are passed both pointers to hardware page tables, and pointers to previously-read PMD entries on the stack. To ensure correctness in the first case, these functions must use the page table accessor function (pmdp_get()) to dereference the supplied pointer. However, this means pmdp_get() is called twice in the second case. This double call must be avoided if pmdp_get() applies some non-idempotent transformation to the value. Avoid the double transformation by calling set_pmd() on the stack variables where necessary to keep set_pmd()/pmdp_get() calls balanced. Signed-off-by: Samuel Holland --- (no changes since v2) Changes in v2: - New patch for v2 kernel/events/core.c | 2 ++ mm/gup.c | 3 +++ mm/khugepaged.c | 6 ++++-- mm/page_table_check.c | 3 +++ mm/pgtable-generic.c | 2 ++ 5 files changed, 14 insertions(+), 2 deletions(-) diff --git a/kernel/events/core.c b/kernel/events/core.c index fa4f9165bd94..7969b060bf2d 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -8154,6 +8154,8 @@ static u64 perf_get_pgtable_size(struct mm_struct *mm, unsigned long addr) if (pmd_leaf(pmd)) return pmd_leaf_size(pmd); + /* transform pmd as if &pmd pointed to a hardware page table */ + set_pmd(&pmd, pmd); ptep = pte_offset_map(&pmd, addr); if (!ptep) goto again; diff --git a/mm/gup.c b/mm/gup.c index 549f9e868311..aba61704049e 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -2844,7 +2844,10 @@ static int gup_fast_pte_range(pmd_t pmd, pmd_t *pmdp, unsigned long addr, int ret = 0; pte_t *ptep, *ptem; + /* transform pmd as if &pmd pointed to a hardware page table */ + set_pmd(&pmd, pmd); ptem = ptep = pte_offset_map(&pmd, addr); + pmd = pmdp_get(&pmd); if (!ptep) return 0; do { diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 1bff8ade751a..ab1f68a7bc83 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -1724,7 +1724,7 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) struct mmu_notifier_range range; struct mm_struct *mm; unsigned long addr; - pmd_t *pmd, pgt_pmd; + pmd_t *pmd, pgt_pmd, pmdval; spinlock_t *pml; spinlock_t *ptl; bool success = false; @@ -1777,7 +1777,9 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) */ if (check_pmd_state(pmd) != SCAN_SUCCEED) goto drop_pml; - ptl = pte_lockptr(mm, pmd); + /* pte_lockptr() needs a value, not a pointer to a page table */ + pmdval = pmdp_get(pmd); + ptl = pte_lockptr(mm, &pmdval); if (ptl != pml) spin_lock_nested(ptl, SINGLE_DEPTH_NESTING); diff --git a/mm/page_table_check.c b/mm/page_table_check.c index 31f4c39d20ef..77d6688db0de 100644 --- a/mm/page_table_check.c +++ b/mm/page_table_check.c @@ -260,7 +260,10 @@ void __page_table_check_pte_clear_range(struct mm_struct *mm, return; if (!pmd_bad(pmd) && !pmd_leaf(pmd)) { + /* transform pmd as if &pmd pointed to a hardware page table */ + set_pmd(&pmd, pmd); pte_t *ptep = pte_offset_map(&pmd, addr); + pmd = pmdp_get(&pmd); unsigned long i; if (WARN_ON(!ptep)) diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 63a573306bfa..6602deb002f1 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -299,6 +299,8 @@ pte_t *___pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp) pmd_clear_bad(pmd); goto nomap; } + /* transform pmdval as if &pmdval pointed to a hardware page table */ + set_pmd(&pmdval, pmdval); return __pte_map(&pmdval, addr); nomap: rcu_read_unlock(); -- 2.47.2 Use the semantically appropriate accessor function instead of open coding the implementation. This will become important once these functions start transforming the PTE value on some platforms. Signed-off-by: Samuel Holland --- (no changes since v2) Changes in v2: - New patch for v2 arch/riscv/kernel/hibernate.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/arch/riscv/kernel/hibernate.c b/arch/riscv/kernel/hibernate.c index 671b686c0158..2a9bc9d9e776 100644 --- a/arch/riscv/kernel/hibernate.c +++ b/arch/riscv/kernel/hibernate.c @@ -171,7 +171,7 @@ static int temp_pgtable_map_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long pte_t *src_ptep; pte_t *dst_ptep; - if (pmd_none(READ_ONCE(*dst_pmdp))) { + if (pmd_none(pmdp_get(dst_pmdp))) { dst_ptep = (pte_t *)get_safe_page(GFP_ATOMIC); if (!dst_ptep) return -ENOMEM; @@ -183,7 +183,7 @@ static int temp_pgtable_map_pte(pmd_t *dst_pmdp, pmd_t *src_pmdp, unsigned long src_ptep = pte_offset_kernel(src_pmdp, start); do { - pte_t pte = READ_ONCE(*src_ptep); + pte_t pte = ptep_get(src_ptep); if (pte_present(pte)) set_pte(dst_ptep, __pte(pte_val(pte) | pgprot_val(prot))); @@ -200,7 +200,7 @@ static int temp_pgtable_map_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long pmd_t *src_pmdp; pmd_t *dst_pmdp; - if (pud_none(READ_ONCE(*dst_pudp))) { + if (pud_none(pudp_get(dst_pudp))) { dst_pmdp = (pmd_t *)get_safe_page(GFP_ATOMIC); if (!dst_pmdp) return -ENOMEM; @@ -212,7 +212,7 @@ static int temp_pgtable_map_pmd(pud_t *dst_pudp, pud_t *src_pudp, unsigned long src_pmdp = pmd_offset(src_pudp, start); do { - pmd_t pmd = READ_ONCE(*src_pmdp); + pmd_t pmd = pmdp_get(src_pmdp); next = pmd_addr_end(start, end); @@ -239,7 +239,7 @@ static int temp_pgtable_map_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long pud_t *dst_pudp; pud_t *src_pudp; - if (p4d_none(READ_ONCE(*dst_p4dp))) { + if (p4d_none(p4dp_get(dst_p4dp))) { dst_pudp = (pud_t *)get_safe_page(GFP_ATOMIC); if (!dst_pudp) return -ENOMEM; @@ -251,7 +251,7 @@ static int temp_pgtable_map_pud(p4d_t *dst_p4dp, p4d_t *src_p4dp, unsigned long src_pudp = pud_offset(src_p4dp, start); do { - pud_t pud = READ_ONCE(*src_pudp); + pud_t pud = pudp_get(src_pudp); next = pud_addr_end(start, end); @@ -278,7 +278,7 @@ static int temp_pgtable_map_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long p4d_t *dst_p4dp; p4d_t *src_p4dp; - if (pgd_none(READ_ONCE(*dst_pgdp))) { + if (pgd_none(pgdp_get(dst_pgdp))) { dst_p4dp = (p4d_t *)get_safe_page(GFP_ATOMIC); if (!dst_p4dp) return -ENOMEM; @@ -290,7 +290,7 @@ static int temp_pgtable_map_p4d(pgd_t *dst_pgdp, pgd_t *src_pgdp, unsigned long src_p4dp = p4d_offset(src_pgdp, start); do { - p4d_t p4d = READ_ONCE(*src_p4dp); + p4d_t p4d = p4dp_get(src_p4dp); next = p4d_addr_end(start, end); @@ -317,7 +317,7 @@ static int temp_pgtable_mapping(pgd_t *pgdp, unsigned long start, unsigned long unsigned long ret; do { - pgd_t pgd = READ_ONCE(*src_pgdp); + pgd_t pgd = pgdp_get(src_pgdp); next = pgd_addr_end(start, end); -- 2.47.2 Use the semantically appropriate accessor function instead of a raw pointer dereference. This will become important once these functions start transforming the PTE value on some platforms. Signed-off-by: Samuel Holland --- (no changes since v2) Changes in v2: - New patch for v2 arch/riscv/include/asm/pgtable.h | 8 ++-- arch/riscv/kvm/gstage.c | 6 +-- arch/riscv/mm/init.c | 68 +++++++++++++++++--------------- arch/riscv/mm/pgtable.c | 9 +++-- 4 files changed, 49 insertions(+), 42 deletions(-) diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 5a08eb5fe99f..acfd48f92010 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -952,7 +952,7 @@ static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, #ifdef CONFIG_SMP pud_t pud = __pud(xchg(&pudp->pud, 0)); #else - pud_t pud = *pudp; + pud_t pud = pudp_get(pudp); pud_clear(pudp); #endif @@ -1129,13 +1129,15 @@ extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]; */ #define set_p4d_safe(p4dp, p4d) \ ({ \ - WARN_ON_ONCE(p4d_present(*p4dp) && !p4d_same(*p4dp, p4d)); \ + p4d_t old = p4dp_get(p4dp); \ + WARN_ON_ONCE(p4d_present(old) && !p4d_same(old, p4d)); \ set_p4d(p4dp, p4d); \ }) #define set_pgd_safe(pgdp, pgd) \ ({ \ - WARN_ON_ONCE(pgd_present(*pgdp) && !pgd_same(*pgdp, pgd)); \ + pgd_t old = pgdp_get(pgdp); \ + WARN_ON_ONCE(pgd_present(old) && !pgd_same(old, pgd)); \ set_pgd(pgdp, pgd); \ }) #endif /* !__ASSEMBLER__ */ diff --git a/arch/riscv/kvm/gstage.c b/arch/riscv/kvm/gstage.c index b67d60d722c2..297744e2ab5d 100644 --- a/arch/riscv/kvm/gstage.c +++ b/arch/riscv/kvm/gstage.c @@ -154,7 +154,7 @@ int kvm_riscv_gstage_set_pte(struct kvm_gstage *gstage, ptep = &next_ptep[gstage_pte_index(map->addr, current_level)]; } - if (pte_val(*ptep) != pte_val(map->pte)) { + if (pte_val(ptep_get(ptep)) != pte_val(map->pte)) { set_pte(ptep, map->pte); if (gstage_pte_leaf(ptep)) gstage_tlb_flush(gstage, current_level, map->addr); @@ -241,12 +241,12 @@ void kvm_riscv_gstage_op_pte(struct kvm_gstage *gstage, gpa_t addr, if (op == GSTAGE_OP_CLEAR) put_page(virt_to_page(next_ptep)); } else { - old_pte = *ptep; + old_pte = ptep_get(ptep); if (op == GSTAGE_OP_CLEAR) set_pte(ptep, __pte(0)); else if (op == GSTAGE_OP_WP) set_pte(ptep, __pte(pte_val(ptep_get(ptep)) & ~_PAGE_WRITE)); - if (pte_val(*ptep) != pte_val(old_pte)) + if (pte_val(ptep_get(ptep)) != pte_val(old_pte)) gstage_tlb_flush(gstage, ptep_level, addr); } } diff --git a/arch/riscv/mm/init.c b/arch/riscv/mm/init.c index d85efe74a4b6..ac686c1b2f85 100644 --- a/arch/riscv/mm/init.c +++ b/arch/riscv/mm/init.c @@ -459,8 +459,8 @@ static void __meminit create_pte_mapping(pte_t *ptep, uintptr_t va, phys_addr_t BUG_ON(sz != PAGE_SIZE); - if (pte_none(ptep[pte_idx])) - ptep[pte_idx] = pfn_pte(PFN_DOWN(pa), prot); + if (pte_none(ptep_get(ptep + pte_idx))) + set_pte(ptep + pte_idx, pfn_pte(PFN_DOWN(pa), prot)); } #ifndef __PAGETABLE_PMD_FOLDED @@ -542,18 +542,19 @@ static void __meminit create_pmd_mapping(pmd_t *pmdp, uintptr_t pmd_idx = pmd_index(va); if (sz == PMD_SIZE) { - if (pmd_none(pmdp[pmd_idx])) - pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pa), prot); + if (pmd_none(pmdp_get(pmdp + pmd_idx))) + set_pmd(pmdp + pmd_idx, pfn_pmd(PFN_DOWN(pa), prot)); return; } - if (pmd_none(pmdp[pmd_idx])) { + if (pmd_none(pmdp_get(pmdp + pmd_idx))) { pte_phys = pt_ops.alloc_pte(va); - pmdp[pmd_idx] = pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE); + set_pmd(pmdp + pmd_idx, + pfn_pmd(PFN_DOWN(pte_phys), PAGE_TABLE)); ptep = pt_ops.get_pte_virt(pte_phys); memset(ptep, 0, PAGE_SIZE); } else { - pte_phys = PFN_PHYS(_pmd_pfn(pmdp[pmd_idx])); + pte_phys = PFN_PHYS(_pmd_pfn(pmdp_get(pmdp + pmd_idx))); ptep = pt_ops.get_pte_virt(pte_phys); } @@ -644,18 +645,19 @@ static void __meminit create_pud_mapping(pud_t *pudp, uintptr_t va, phys_addr_t uintptr_t pud_index = pud_index(va); if (sz == PUD_SIZE) { - if (pud_val(pudp[pud_index]) == 0) - pudp[pud_index] = pfn_pud(PFN_DOWN(pa), prot); + if (pud_val(pudp_get(pudp + pud_index)) == 0) + set_pud(pudp + pud_index, pfn_pud(PFN_DOWN(pa), prot)); return; } - if (pud_val(pudp[pud_index]) == 0) { + if (pud_val(pudp_get(pudp + pud_index)) == 0) { next_phys = pt_ops.alloc_pmd(va); - pudp[pud_index] = pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE); + set_pud(pudp + pud_index, + pfn_pud(PFN_DOWN(next_phys), PAGE_TABLE)); nextp = pt_ops.get_pmd_virt(next_phys); memset(nextp, 0, PAGE_SIZE); } else { - next_phys = PFN_PHYS(_pud_pfn(pudp[pud_index])); + next_phys = PFN_PHYS(_pud_pfn(pudp_get(pudp + pud_index))); nextp = pt_ops.get_pmd_virt(next_phys); } @@ -670,18 +672,19 @@ static void __meminit create_p4d_mapping(p4d_t *p4dp, uintptr_t va, phys_addr_t uintptr_t p4d_index = p4d_index(va); if (sz == P4D_SIZE) { - if (p4d_val(p4dp[p4d_index]) == 0) - p4dp[p4d_index] = pfn_p4d(PFN_DOWN(pa), prot); + if (p4d_val(p4dp_get(p4dp + p4d_index)) == 0) + set_p4d(p4dp + p4d_index, pfn_p4d(PFN_DOWN(pa), prot)); return; } - if (p4d_val(p4dp[p4d_index]) == 0) { + if (p4d_val(p4dp_get(p4dp + p4d_index)) == 0) { next_phys = pt_ops.alloc_pud(va); - p4dp[p4d_index] = pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE); + set_p4d(p4dp + p4d_index, + pfn_p4d(PFN_DOWN(next_phys), PAGE_TABLE)); nextp = pt_ops.get_pud_virt(next_phys); memset(nextp, 0, PAGE_SIZE); } else { - next_phys = PFN_PHYS(_p4d_pfn(p4dp[p4d_index])); + next_phys = PFN_PHYS(_p4d_pfn(p4dp_get(p4dp + p4d_index))); nextp = pt_ops.get_pud_virt(next_phys); } @@ -727,18 +730,19 @@ void __meminit create_pgd_mapping(pgd_t *pgdp, uintptr_t va, phys_addr_t pa, phy uintptr_t pgd_idx = pgd_index(va); if (sz == PGDIR_SIZE) { - if (pgd_val(pgdp[pgd_idx]) == 0) - pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(pa), prot); + if (pgd_val(pgdp_get(pgdp + pgd_idx)) == 0) + set_pgd(pgdp + pgd_idx, pfn_pgd(PFN_DOWN(pa), prot)); return; } - if (pgd_val(pgdp[pgd_idx]) == 0) { + if (pgd_val(pgdp_get(pgdp + pgd_idx)) == 0) { next_phys = alloc_pgd_next(va); - pgdp[pgd_idx] = pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE); + set_pgd(pgdp + pgd_idx, + pfn_pgd(PFN_DOWN(next_phys), PAGE_TABLE)); nextp = get_pgd_next_virt(next_phys); memset(nextp, 0, PAGE_SIZE); } else { - next_phys = PFN_PHYS(_pgd_pfn(pgdp[pgd_idx])); + next_phys = PFN_PHYS(_pgd_pfn(pgdp_get(pgdp + pgd_idx))); nextp = get_pgd_next_virt(next_phys); } @@ -1574,14 +1578,14 @@ struct execmem_info __init *execmem_arch_setup(void) #ifdef CONFIG_MEMORY_HOTPLUG static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) { - struct page *page = pmd_page(*pmd); + struct page *page = pmd_page(pmdp_get(pmd)); struct ptdesc *ptdesc = page_ptdesc(page); pte_t *pte; int i; for (i = 0; i < PTRS_PER_PTE; i++) { pte = pte_start + i; - if (!pte_none(*pte)) + if (!pte_none(ptep_get(pte))) return; } @@ -1595,14 +1599,14 @@ static void __meminit free_pte_table(pte_t *pte_start, pmd_t *pmd) static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, bool is_vmemmap) { - struct page *page = pud_page(*pud); + struct page *page = pud_page(pudp_get(pud)); struct ptdesc *ptdesc = page_ptdesc(page); pmd_t *pmd; int i; for (i = 0; i < PTRS_PER_PMD; i++) { pmd = pmd_start + i; - if (!pmd_none(*pmd)) + if (!pmd_none(pmdp_get(pmd))) return; } @@ -1617,13 +1621,13 @@ static void __meminit free_pmd_table(pmd_t *pmd_start, pud_t *pud, bool is_vmemm static void __meminit free_pud_table(pud_t *pud_start, p4d_t *p4d) { - struct page *page = p4d_page(*p4d); + struct page *page = p4d_page(p4dp_get(p4d)); pud_t *pud; int i; for (i = 0; i < PTRS_PER_PUD; i++) { pud = pud_start + i; - if (!pud_none(*pud)) + if (!pud_none(pudp_get(pud))) return; } @@ -1668,7 +1672,7 @@ static void __meminit remove_pte_mapping(pte_t *pte_base, unsigned long addr, un ptep = pte_base + pte_index(addr); pte = ptep_get(ptep); - if (!pte_present(*ptep)) + if (!pte_present(ptep_get(ptep))) continue; pte_clear(&init_mm, addr, ptep); @@ -1698,7 +1702,7 @@ static void __meminit remove_pmd_mapping(pmd_t *pmd_base, unsigned long addr, un continue; } - pte_base = (pte_t *)pmd_page_vaddr(*pmdp); + pte_base = (pte_t *)pmd_page_vaddr(pmdp_get(pmdp)); remove_pte_mapping(pte_base, addr, next, is_vmemmap, altmap); free_pte_table(pte_base, pmdp); } @@ -1777,10 +1781,10 @@ static void __meminit remove_pgd_mapping(unsigned long va, unsigned long end, bo next = pgd_addr_end(addr, end); pgd = pgd_offset_k(addr); - if (!pgd_present(*pgd)) + if (!pgd_present(pgdp_get(pgd))) continue; - if (pgd_leaf(*pgd)) + if (pgd_leaf(pgdp_get(pgd))) continue; p4d_base = p4d_offset(pgd, 0); diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c index 8b6c0a112a8d..c4b85a828797 100644 --- a/arch/riscv/mm/pgtable.c +++ b/arch/riscv/mm/pgtable.c @@ -95,8 +95,8 @@ int pud_free_pmd_page(pud_t *pud, unsigned long addr) flush_tlb_kernel_range(addr, addr + PUD_SIZE); for (i = 0; i < PTRS_PER_PMD; i++) { - if (!pmd_none(pmd[i])) { - pte_t *pte = (pte_t *)pmd_page_vaddr(pmd[i]); + if (!pmd_none(pmdp_get(pmd + i))) { + pte_t *pte = (pte_t *)pmd_page_vaddr(pmdp_get(pmd + i)); pte_free_kernel(NULL, pte); } @@ -158,8 +158,9 @@ pmd_t pmdp_collapse_flush(struct vm_area_struct *vma, pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address, pud_t *pudp) { - VM_WARN_ON_ONCE(!pud_present(*pudp)); - pud_t old = pudp_establish(vma, address, pudp, pud_mkinvalid(*pudp)); + VM_WARN_ON_ONCE(!pud_present(pudp_get(pudp))); + pud_t old = pudp_establish(vma, address, pudp, + pud_mkinvalid(pudp_get(pudp))); flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE); return old; -- 2.47.2 RISC-V uses the same page table entry format and has the same atomicity requirements at all page table levels, so these setter functions use the same underlying implementation at all levels. Checking the translation mode to pick between two identical branches only serves to make these functions less efficient. Signed-off-by: Samuel Holland --- (no changes since v2) Changes in v2: - New patch for v2 arch/riscv/include/asm/pgtable-64.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 6e789fa58514..5532f8515450 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -275,10 +275,7 @@ static inline unsigned long _pmd_pfn(pmd_t pmd) static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { - if (pgtable_l4_enabled) - WRITE_ONCE(*p4dp, p4d); - else - set_pud((pud_t *)p4dp, (pud_t){ p4d_val(p4d) }); + WRITE_ONCE(*p4dp, p4d); } static inline int p4d_none(p4d_t p4d) @@ -342,10 +339,7 @@ pud_t *pud_offset(p4d_t *p4d, unsigned long address); static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) { - if (pgtable_l5_enabled) - WRITE_ONCE(*pgdp, pgd); - else - set_p4d((p4d_t *)pgdp, (p4d_t){ pgd_val(pgd) }); + WRITE_ONCE(*pgdp, pgd); } static inline int pgd_none(pgd_t pgd) -- 2.47.2 The two existing definitions are equivalent because _PAGE_MTMASK is defined as 0 on riscv32. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland --- (no changes since v1) arch/riscv/include/asm/pgtable-32.h | 5 ----- arch/riscv/include/asm/pgtable-64.h | 7 ------- arch/riscv/include/asm/pgtable.h | 6 ++++++ 3 files changed, 6 insertions(+), 12 deletions(-) diff --git a/arch/riscv/include/asm/pgtable-32.h b/arch/riscv/include/asm/pgtable-32.h index 00f3369570a8..fa6c87015c48 100644 --- a/arch/riscv/include/asm/pgtable-32.h +++ b/arch/riscv/include/asm/pgtable-32.h @@ -28,11 +28,6 @@ #define _PAGE_IO 0 #define _PAGE_MTMASK 0 -/* Set of bits to preserve across pte_modify() */ -#define _PAGE_CHG_MASK (~(unsigned long)(_PAGE_PRESENT | _PAGE_READ | \ - _PAGE_WRITE | _PAGE_EXEC | \ - _PAGE_USER | _PAGE_GLOBAL)) - static const __maybe_unused int pgtable_l4_enabled; static const __maybe_unused int pgtable_l5_enabled; diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 5532f8515450..093f0f41fd23 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -66,7 +66,6 @@ typedef struct { #define pmd_val(x) ((x).pmd) #define __pmd(x) ((pmd_t) { (x) }) - #define PTRS_PER_PMD (PAGE_SIZE / sizeof(pmd_t)) #define MAX_POSSIBLE_PHYSMEM_BITS 56 @@ -168,12 +167,6 @@ static inline u64 riscv_page_io(void) #define _PAGE_IO riscv_page_io() #define _PAGE_MTMASK riscv_page_mtmask() -/* Set of bits to preserve across pte_modify() */ -#define _PAGE_CHG_MASK (~(unsigned long)(_PAGE_PRESENT | _PAGE_READ | \ - _PAGE_WRITE | _PAGE_EXEC | \ - _PAGE_USER | _PAGE_GLOBAL | \ - _PAGE_MTMASK)) - static inline int pud_present(pud_t pud) { return (pud_val(pud) & _PAGE_PRESENT); diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index acfd48f92010..ba2fb1d475a3 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -207,6 +207,12 @@ extern struct pt_alloc_ops pt_ops __meminitdata; #define _PAGE_IOREMAP ((_PAGE_KERNEL & ~_PAGE_MTMASK) | _PAGE_IO) #define PAGE_KERNEL_IO __pgprot(_PAGE_IOREMAP) +/* Set of bits to preserve across pte_modify() */ +#define _PAGE_CHG_MASK (~(unsigned long)(_PAGE_PRESENT | _PAGE_READ | \ + _PAGE_WRITE | _PAGE_EXEC | \ + _PAGE_USER | _PAGE_GLOBAL | \ + _PAGE_MTMASK)) + extern pgd_t swapper_pg_dir[]; extern pgd_t trampoline_pg_dir[]; extern pgd_t early_pg_dir[]; -- 2.47.2 When the Svnapot or Svpbmt extension is not implemented, the corresponding page table bits are reserved, and must be zero. There is no need to show them in the ptdump output. When the Kconfig option for an extension is disabled, we assume it is not implemented. In that case, the kernel may provide a fallback definition for the fields, like how _PAGE_MTMASK is defined on riscv32. Using those fallback definitions in ptdump would produce incorrect results. To avoid this, hide the fields from the ptdump output. Reviewed-by: Alexandre Ghiti Signed-off-by: Samuel Holland --- (no changes since v1) arch/riscv/mm/ptdump.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 34299c2b231f..0dd6ee282953 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -134,11 +134,13 @@ struct prot_bits { static const struct prot_bits pte_bits[] = { { -#ifdef CONFIG_64BIT +#ifdef CONFIG_RISCV_ISA_SVNAPOT .mask = _PAGE_NAPOT, .set = "N", .clear = ".", }, { +#endif +#ifdef CONFIG_RISCV_ISA_SVPBMT .mask = _PAGE_MTMASK_SVPBMT, .set = "MT(%s)", .clear = " .. ", @@ -214,7 +216,7 @@ static void dump_prot(struct pg_state *st) if (val) { if (pte_bits[i].mask == _PAGE_SOFT) sprintf(s, pte_bits[i].set, val >> 8); -#ifdef CONFIG_64BIT +#ifdef CONFIG_RISCV_ISA_SVPBMT else if (pte_bits[i].mask == _PAGE_MTMASK_SVPBMT) { if (val == _PAGE_NOCACHE_SVPBMT) sprintf(s, pte_bits[i].set, "NC"); -- 2.47.2 Currently, Linux on RISC-V has three ways to specify the cacheability and ordering PMAs of a page: 1) Do nothing; assume the system is entirely cache-coherent and rely on the hardware for any ordering requirements 2) Use the page table bits specified by Svpbmt 3) Use the page table bits specified by XTheadMae To support all three methods, the kernel dynamically determines the definitions of the _PAGE_NOCACHE and _PAGE_IO fields. However, this alone is not sufficient, as XTheadMae uses a nonzero memory type value for normal memory pages. So the kernel uses an additional alternative sequence (ALT_THEAD_PMA) to insert the correct memory type when generating page table entries. Some RISC-V platforms use a fourth method to specify the cacheability of a page of RAM: RAM is mapped to multiple physical address ranges, with each alias having a different set of statically-determined PMAs. Software selects the PMAs for a page by choosing the corresponding PFN from one of the available physical address ranges. Like for XTheadMae, this strategy also requires applying a transformation when writing page table entries. Since these physical memory aliases should be invisible to the rest of the kernel, the opposite transformation must be applied when reading page table entries. However, with this last method of specifying PMAs, there is no inherent way to indicate the cacheability of a page in the pgprot_t value, since the PFN itself determines cacheability. This implementation reuses the PTE bits from Svpbmt, as Svpbmt is the standard RISC-V extension, and thus ought to be the most common way to indicate per-page PMAs. Thus, the Svpbmt variant of _PAGE_NOCACHE and _PAGE_IO is made available even when the CPU does not support the extension. It turns out that with some clever bit manipulation, it is just as efficient to transform all three Svpbmt memory type values to the corresponding XTheadMae values, as it is to check for and insert the one XTheadMae memory type value for normal memory. Thus, we implement XTheadMae as a variant on top of Svpbmt. This allows the _PAGE_NOCACHE and _PAGE_IO definitions to be compile-time constants, and centralizes all memory type handling to a single set of alternative macros. However, there is a tradeoff for platforms relying on hardware for all memory type handling: the memory type PTE bits must now be masked off when writing page table entries, whereas previously no transformation was needed. As a side effect, since the inverse transformation is applied when reading back page table entries, this change fixes the reporting of the memory type bits from ptdump on platforms with XTheadMae. Signed-off-by: Samuel Holland --- (no changes since v2) Changes in v2: - Keep Kconfig options for each PBMT variant separate/non-overlapping - Move fixup code sequences to set_pXX() and pXXp_get() - Only define ALT_UNFIX_MT in configurations that need it - Improve inline documentation of ALT_FIXUP_MT/ALT_UNFIX_MT arch/riscv/include/asm/errata_list.h | 45 ------- arch/riscv/include/asm/pgtable-32.h | 3 + arch/riscv/include/asm/pgtable-64.h | 171 ++++++++++++++++++++++----- arch/riscv/include/asm/pgtable.h | 47 ++++---- arch/riscv/mm/pgtable.c | 14 +-- arch/riscv/mm/ptdump.c | 12 +- 6 files changed, 174 insertions(+), 118 deletions(-) diff --git a/arch/riscv/include/asm/errata_list.h b/arch/riscv/include/asm/errata_list.h index 6694b5ccdcf8..fa03021b7074 100644 --- a/arch/riscv/include/asm/errata_list.h +++ b/arch/riscv/include/asm/errata_list.h @@ -53,51 +53,6 @@ asm(ALTERNATIVE( \ : /* no inputs */ \ : "memory") -/* - * _val is marked as "will be overwritten", so need to set it to 0 - * in the default case. - */ -#define ALT_SVPBMT_SHIFT 61 -#define ALT_THEAD_MAE_SHIFT 59 -#define ALT_SVPBMT(_val, prot) \ -asm(ALTERNATIVE_2("li %0, 0\t\nnop", \ - "li %0, %1\t\nslli %0,%0,%3", 0, \ - RISCV_ISA_EXT_SVPBMT, CONFIG_RISCV_ISA_SVPBMT, \ - "li %0, %2\t\nslli %0,%0,%4", THEAD_VENDOR_ID, \ - ERRATA_THEAD_MAE, CONFIG_ERRATA_THEAD_MAE) \ - : "=r"(_val) \ - : "I"(prot##_SVPBMT >> ALT_SVPBMT_SHIFT), \ - "I"(prot##_THEAD >> ALT_THEAD_MAE_SHIFT), \ - "I"(ALT_SVPBMT_SHIFT), \ - "I"(ALT_THEAD_MAE_SHIFT)) - -#ifdef CONFIG_ERRATA_THEAD_MAE -/* - * IO/NOCACHE memory types are handled together with svpbmt, - * so on T-Head chips, check if no other memory type is set, - * and set the non-0 PMA type if applicable. - */ -#define ALT_THEAD_PMA(_val) \ -asm volatile(ALTERNATIVE( \ - __nops(7), \ - "li t3, %1\n\t" \ - "slli t3, t3, %3\n\t" \ - "and t3, %0, t3\n\t" \ - "bne t3, zero, 2f\n\t" \ - "li t3, %2\n\t" \ - "slli t3, t3, %3\n\t" \ - "or %0, %0, t3\n\t" \ - "2:", THEAD_VENDOR_ID, \ - ERRATA_THEAD_MAE, CONFIG_ERRATA_THEAD_MAE) \ - : "+r"(_val) \ - : "I"(_PAGE_MTMASK_THEAD >> ALT_THEAD_MAE_SHIFT), \ - "I"(_PAGE_PMA_THEAD >> ALT_THEAD_MAE_SHIFT), \ - "I"(ALT_THEAD_MAE_SHIFT) \ - : "t3") -#else -#define ALT_THEAD_PMA(_val) -#endif - #define ALT_CMO_OP(_op, _start, _size, _cachesize) \ asm volatile(ALTERNATIVE( \ __nops(5), \ diff --git a/arch/riscv/include/asm/pgtable-32.h b/arch/riscv/include/asm/pgtable-32.h index fa6c87015c48..90ef35a7c1a5 100644 --- a/arch/riscv/include/asm/pgtable-32.h +++ b/arch/riscv/include/asm/pgtable-32.h @@ -28,6 +28,9 @@ #define _PAGE_IO 0 #define _PAGE_MTMASK 0 +#define ALT_FIXUP_MT(_val) +#define ALT_UNFIX_MT(_val) + static const __maybe_unused int pgtable_l4_enabled; static const __maybe_unused int pgtable_l5_enabled; diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index 093f0f41fd23..aad34c754325 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -8,7 +8,7 @@ #include #include -#include +#include extern bool pgtable_l4_enabled; extern bool pgtable_l5_enabled; @@ -111,6 +111,8 @@ enum napot_cont_order { #define HUGE_MAX_HSTATE 2 #endif +#if defined(CONFIG_RISCV_ISA_SVPBMT) || defined(CONFIG_ERRATA_THEAD_MAE) + /* * [62:61] Svpbmt Memory Type definitions: * @@ -119,53 +121,152 @@ enum napot_cont_order { * 10 - IO Non-cacheable, non-idempotent, strongly-ordered I/O memory * 11 - Rsvd Reserved for future standard use */ -#define _PAGE_NOCACHE_SVPBMT (1UL << 61) -#define _PAGE_IO_SVPBMT (1UL << 62) -#define _PAGE_MTMASK_SVPBMT (_PAGE_NOCACHE_SVPBMT | _PAGE_IO_SVPBMT) +#define _PAGE_NOCACHE (1UL << 61) +#define _PAGE_IO (2UL << 61) +#define _PAGE_MTMASK (3UL << 61) /* + * ALT_FIXUP_MT + * + * On systems that do not support any form of page-based memory type + * configuration, this code sequence clears the memory type bits in the PTE. + * + * On systems that support Svpbmt, the memory type bits are left alone. + * + * On systems that support XTheadMae, a Svpbmt memory type is transformed + * into the corresponding XTheadMae memory type. + * * [63:59] T-Head Memory Type definitions: * bit[63] SO - Strong Order * bit[62] C - Cacheable * bit[61] B - Bufferable * bit[60] SH - Shareable * bit[59] Sec - Trustable - * 00110 - NC Weakly-ordered, Non-cacheable, Bufferable, Shareable, Non-trustable * 01110 - PMA Weakly-ordered, Cacheable, Bufferable, Shareable, Non-trustable + * 00110 - NC Weakly-ordered, Non-cacheable, Bufferable, Shareable, Non-trustable * 10010 - IO Strongly-ordered, Non-cacheable, Non-bufferable, Shareable, Non-trustable + * + * Pseudocode operating on bits [63:60]: + * t0 = mt << 1 + * if (t0 == 0) + * t0 |= 2 + * t0 ^= 0x5 + * mt ^= t0 + */ + +#define ALT_FIXUP_MT(_val) \ + asm(ALTERNATIVE_2("addi t0, zero, 0x3\n\t" \ + "slli t0, t0, 61\n\t" \ + "not t0, t0\n\t" \ + "and %0, %0, t0\n\t" \ + "nop\n\t" \ + "nop\n\t" \ + "nop", \ + __nops(7), \ + 0, RISCV_ISA_EXT_SVPBMT, CONFIG_RISCV_ISA_SVPBMT, \ + "srli t0, %0, 59\n\t" \ + "seqz t1, t0\n\t" \ + "slli t1, t1, 1\n\t" \ + "or t0, t0, t1\n\t" \ + "xori t0, t0, 0x5\n\t" \ + "slli t0, t0, 60\n\t" \ + "xor %0, %0, t0", \ + THEAD_VENDOR_ID, ERRATA_THEAD_MAE, CONFIG_ERRATA_THEAD_MAE) \ + : "+r" (_val) :: "t0", "t1") + +#else + +#define _PAGE_NOCACHE 0 +#define _PAGE_IO 0 +#define _PAGE_MTMASK 0 + +#define ALT_FIXUP_MT(_val) + +#endif /* CONFIG_RISCV_ISA_SVPBMT || CONFIG_ERRATA_THEAD_MAE */ + +#if defined(CONFIG_ERRATA_THEAD_MAE) + +/* + * ALT_UNFIX_MT + * + * On systems that support Svpbmt, or do not support any form of page-based + * memory type configuration, the memory type bits are left alone. + * + * On systems that support XTheadMae, the XTheadMae memory type (or zero) is + * transformed back into the corresponding Svpbmt memory type. + * + * Pseudocode operating on bits [63:60]: + * t0 = mt & 0xd + * t0 ^= t0 >> 1 + * mt ^= t0 */ -#define _PAGE_PMA_THEAD ((1UL << 62) | (1UL << 61) | (1UL << 60)) -#define _PAGE_NOCACHE_THEAD ((1UL << 61) | (1UL << 60)) -#define _PAGE_IO_THEAD ((1UL << 63) | (1UL << 60)) -#define _PAGE_MTMASK_THEAD (_PAGE_PMA_THEAD | _PAGE_IO_THEAD | (1UL << 59)) -static inline u64 riscv_page_mtmask(void) +#define ALT_UNFIX_MT(_val) \ + asm(ALTERNATIVE(__nops(6), \ + "srli t0, %0, 60\n\t" \ + "andi t0, t0, 0xd\n\t" \ + "srli t1, t0, 1\n\t" \ + "xor t0, t0, t1\n\t" \ + "slli t0, t0, 60\n\t" \ + "xor %0, %0, t0", \ + THEAD_VENDOR_ID, ERRATA_THEAD_MAE, CONFIG_ERRATA_THEAD_MAE) \ + : "+r" (_val) :: "t0", "t1") + +#define ptep_get ptep_get +static inline pte_t ptep_get(pte_t *ptep) { - u64 val; + pte_t pte = READ_ONCE(*ptep); - ALT_SVPBMT(val, _PAGE_MTMASK); - return val; + ALT_UNFIX_MT(pte); + + return pte; } -static inline u64 riscv_page_nocache(void) +#define pmdp_get pmdp_get +static inline pmd_t pmdp_get(pmd_t *pmdp) { - u64 val; + pmd_t pmd = READ_ONCE(*pmdp); + + ALT_UNFIX_MT(pmd); - ALT_SVPBMT(val, _PAGE_NOCACHE); - return val; + return pmd; } -static inline u64 riscv_page_io(void) +#define pudp_get pudp_get +static inline pud_t pudp_get(pud_t *pudp) { - u64 val; + pud_t pud = READ_ONCE(*pudp); + + ALT_UNFIX_MT(pud); - ALT_SVPBMT(val, _PAGE_IO); - return val; + return pud; } -#define _PAGE_NOCACHE riscv_page_nocache() -#define _PAGE_IO riscv_page_io() -#define _PAGE_MTMASK riscv_page_mtmask() +#define p4dp_get p4dp_get +static inline p4d_t p4dp_get(p4d_t *p4dp) +{ + p4d_t p4d = READ_ONCE(*p4dp); + + ALT_UNFIX_MT(p4d); + + return p4d; +} + +#define pgdp_get pgdp_get +static inline pgd_t pgdp_get(pgd_t *pgdp) +{ + pgd_t pgd = READ_ONCE(*pgdp); + + ALT_UNFIX_MT(pgd); + + return pgd; +} + +#else + +#define ALT_UNFIX_MT(_val) + +#endif /* CONFIG_ERRATA_THEAD_MAE */ static inline int pud_present(pud_t pud) { @@ -195,6 +296,7 @@ static inline int pud_user(pud_t pud) static inline void set_pud(pud_t *pudp, pud_t pud) { + ALT_FIXUP_MT(pud); WRITE_ONCE(*pudp, pud); } @@ -245,11 +347,7 @@ static inline bool mm_pud_folded(struct mm_struct *mm) static inline pmd_t pfn_pmd(unsigned long pfn, pgprot_t prot) { - unsigned long prot_val = pgprot_val(prot); - - ALT_THEAD_PMA(prot_val); - - return __pmd((pfn << _PAGE_PFN_SHIFT) | prot_val); + return __pmd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); } static inline unsigned long _pmd_pfn(pmd_t pmd) @@ -257,6 +355,9 @@ static inline unsigned long _pmd_pfn(pmd_t pmd) return __page_val_to_pfn(pmd_val(pmd)); } +#define pmd_offset_lockless(pudp, pud, address) \ + (pud_pgtable(pud) + pmd_index(address)) + #define pmd_ERROR(e) \ pr_err("%s:%d: bad pmd %016lx.\n", __FILE__, __LINE__, pmd_val(e)) @@ -268,6 +369,7 @@ static inline unsigned long _pmd_pfn(pmd_t pmd) static inline void set_p4d(p4d_t *p4dp, p4d_t p4d) { + ALT_FIXUP_MT(p4d); WRITE_ONCE(*p4dp, p4d); } @@ -327,11 +429,15 @@ static inline struct page *p4d_page(p4d_t p4d) #define pud_index(addr) (((addr) >> PUD_SHIFT) & (PTRS_PER_PUD - 1)) +#define pud_offset_lockless(p4dp, p4d, address) \ + (pgtable_l4_enabled ? p4d_pgtable(p4d) + pud_index(address) : (pud_t *)(p4dp)) + #define pud_offset pud_offset -pud_t *pud_offset(p4d_t *p4d, unsigned long address); +pud_t *pud_offset(p4d_t *p4dp, unsigned long address); static inline void set_pgd(pgd_t *pgdp, pgd_t pgd) { + ALT_FIXUP_MT(pgd); WRITE_ONCE(*pgdp, pgd); } @@ -382,8 +488,11 @@ static inline struct page *pgd_page(pgd_t pgd) #define p4d_index(addr) (((addr) >> P4D_SHIFT) & (PTRS_PER_P4D - 1)) +#define p4d_offset_lockless(pgdp, pgd, address) \ + (pgtable_l5_enabled ? pgd_pgtable(pgd) + p4d_index(address) : (p4d_t *)(pgdp)) + #define p4d_offset p4d_offset -p4d_t *p4d_offset(pgd_t *pgd, unsigned long address); +p4d_t *p4d_offset(pgd_t *pgdp, unsigned long address); #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline pte_t pmd_pte(pmd_t pmd); diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index ba2fb1d475a3..8b622f901707 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -253,6 +253,7 @@ static inline bool pmd_leaf(pmd_t pmd) static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { + ALT_FIXUP_MT(pmd); WRITE_ONCE(*pmdp, pmd); } @@ -263,11 +264,7 @@ static inline void pmd_clear(pmd_t *pmdp) static inline pgd_t pfn_pgd(unsigned long pfn, pgprot_t prot) { - unsigned long prot_val = pgprot_val(prot); - - ALT_THEAD_PMA(prot_val); - - return __pgd((pfn << _PAGE_PFN_SHIFT) | prot_val); + return __pgd((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); } static inline unsigned long _pgd_pfn(pgd_t pgd) @@ -343,11 +340,7 @@ static inline unsigned long pte_pfn(pte_t pte) /* Constructs a page table entry */ static inline pte_t pfn_pte(unsigned long pfn, pgprot_t prot) { - unsigned long prot_val = pgprot_val(prot); - - ALT_THEAD_PMA(prot_val); - - return __pte((pfn << _PAGE_PFN_SHIFT) | prot_val); + return __pte((pfn << _PAGE_PFN_SHIFT) | pgprot_val(prot)); } #define pte_pgprot pte_pgprot @@ -486,11 +479,7 @@ static inline int pmd_protnone(pmd_t pmd) /* Modify page protection bits */ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { - unsigned long newprot_val = pgprot_val(newprot); - - ALT_THEAD_PMA(newprot_val); - - return __pte((pte_val(pte) & _PAGE_CHG_MASK) | newprot_val); + return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot)); } #define pgd_ERROR(e) \ @@ -547,9 +536,10 @@ static inline int pte_same(pte_t pte_a, pte_t pte_b) * a page table are directly modified. Thus, the following hook is * made available. */ -static inline void set_pte(pte_t *ptep, pte_t pteval) +static inline void set_pte(pte_t *ptep, pte_t pte) { - WRITE_ONCE(*ptep, pteval); + ALT_FIXUP_MT(pte); + WRITE_ONCE(*ptep, pte); } void flush_icache_pte(struct mm_struct *mm, pte_t pte); @@ -598,6 +588,7 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, { pte_t pte = __pte(atomic_long_xchg((atomic_long_t *)ptep, 0)); + ALT_UNFIX_MT(pte); page_table_check_pte_clear(mm, pte); return pte; @@ -869,6 +860,7 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, { pmd_t pmd = __pmd(atomic_long_xchg((atomic_long_t *)pmdp, 0)); + ALT_UNFIX_MT(pmd); page_table_check_pmd_clear(mm, pmd); return pmd; @@ -886,7 +878,11 @@ static inline pmd_t pmdp_establish(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t pmd) { page_table_check_pmd_set(vma->vm_mm, pmdp, pmd); - return __pmd(atomic_long_xchg((atomic_long_t *)pmdp, pmd_val(pmd))); + ALT_FIXUP_MT(pmd); + pmd = __pmd(atomic_long_xchg((atomic_long_t *)pmdp, pmd_val(pmd))); + ALT_UNFIX_MT(pmd); + + return pmd; } #define pmdp_collapse_flush pmdp_collapse_flush @@ -955,14 +951,9 @@ static inline int pudp_test_and_clear_young(struct vm_area_struct *vma, static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pud_t *pudp) { -#ifdef CONFIG_SMP - pud_t pud = __pud(xchg(&pudp->pud, 0)); -#else - pud_t pud = pudp_get(pudp); - - pud_clear(pudp); -#endif + pud_t pud = __pud(atomic_long_xchg((atomic_long_t *)pudp, 0)); + ALT_UNFIX_MT(pud); page_table_check_pud_clear(mm, pud); return pud; @@ -985,7 +976,11 @@ static inline pud_t pudp_establish(struct vm_area_struct *vma, unsigned long address, pud_t *pudp, pud_t pud) { page_table_check_pud_set(vma->vm_mm, pudp, pud); - return __pud(atomic_long_xchg((atomic_long_t *)pudp, pud_val(pud))); + ALT_FIXUP_MT(pud); + pud = __pud(atomic_long_xchg((atomic_long_t *)pudp, pud_val(pud))); + ALT_UNFIX_MT(pud); + + return pud; } static inline pud_t pud_mkinvalid(pud_t pud) diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c index c4b85a828797..604744d6924f 100644 --- a/arch/riscv/mm/pgtable.c +++ b/arch/riscv/mm/pgtable.c @@ -42,20 +42,14 @@ int ptep_test_and_clear_young(struct vm_area_struct *vma, EXPORT_SYMBOL_GPL(ptep_test_and_clear_young); #ifdef CONFIG_64BIT -pud_t *pud_offset(p4d_t *p4d, unsigned long address) +pud_t *pud_offset(p4d_t *p4dp, unsigned long address) { - if (pgtable_l4_enabled) - return p4d_pgtable(p4dp_get(p4d)) + pud_index(address); - - return (pud_t *)p4d; + return pud_offset_lockless(p4dp, p4dp_get(p4dp), address); } -p4d_t *p4d_offset(pgd_t *pgd, unsigned long address) +p4d_t *p4d_offset(pgd_t *pgdp, unsigned long address) { - if (pgtable_l5_enabled) - return pgd_pgtable(pgdp_get(pgd)) + p4d_index(address); - - return (p4d_t *)pgd; + return p4d_offset_lockless(pgdp, pgdp_get(pgdp), address); } #endif diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 0dd6ee282953..763ffde8ab5e 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -140,8 +140,8 @@ static const struct prot_bits pte_bits[] = { .clear = ".", }, { #endif -#ifdef CONFIG_RISCV_ISA_SVPBMT - .mask = _PAGE_MTMASK_SVPBMT, +#if defined(CONFIG_RISCV_ISA_SVPBMT) || defined(CONFIG_ERRATA_THEAD_MAE) + .mask = _PAGE_MTMASK, .set = "MT(%s)", .clear = " .. ", }, { @@ -216,11 +216,11 @@ static void dump_prot(struct pg_state *st) if (val) { if (pte_bits[i].mask == _PAGE_SOFT) sprintf(s, pte_bits[i].set, val >> 8); -#ifdef CONFIG_RISCV_ISA_SVPBMT - else if (pte_bits[i].mask == _PAGE_MTMASK_SVPBMT) { - if (val == _PAGE_NOCACHE_SVPBMT) +#if defined(CONFIG_RISCV_ISA_SVPBMT) || defined(CONFIG_ERRATA_THEAD_MAE) + else if (pte_bits[i].mask == _PAGE_MTMASK) { + if (val == _PAGE_NOCACHE) sprintf(s, pte_bits[i].set, "NC"); - else if (val == _PAGE_IO_SVPBMT) + else if (val == _PAGE_IO) sprintf(s, pte_bits[i].set, "IO"); else sprintf(s, pte_bits[i].set, "??"); -- 2.47.2 pgtable-32.h and pgtable-64.h are not usable by assembly code files, so move all page table field definitions to pgtable-bits.h. This allows handling more complex PTE transformations in out-of-line assembly code. Signed-off-by: Samuel Holland --- (no changes since v1) arch/riscv/include/asm/pgtable-32.h | 11 ------- arch/riscv/include/asm/pgtable-64.h | 30 ------------------- arch/riscv/include/asm/pgtable-bits.h | 42 +++++++++++++++++++++++++-- 3 files changed, 40 insertions(+), 43 deletions(-) diff --git a/arch/riscv/include/asm/pgtable-32.h b/arch/riscv/include/asm/pgtable-32.h index 90ef35a7c1a5..eb556ab95732 100644 --- a/arch/riscv/include/asm/pgtable-32.h +++ b/arch/riscv/include/asm/pgtable-32.h @@ -17,17 +17,6 @@ #define MAX_POSSIBLE_PHYSMEM_BITS 34 -/* - * rv32 PTE format: - * | XLEN-1 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 - * PFN reserved for SW D A G U X W R V - */ -#define _PAGE_PFN_MASK GENMASK(31, 10) - -#define _PAGE_NOCACHE 0 -#define _PAGE_IO 0 -#define _PAGE_MTMASK 0 - #define ALT_FIXUP_MT(_val) #define ALT_UNFIX_MT(_val) diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index aad34c754325..fa2c1dcb6f72 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -70,20 +70,6 @@ typedef struct { #define MAX_POSSIBLE_PHYSMEM_BITS 56 -/* - * rv64 PTE format: - * | 63 | 62 61 | 60 54 | 53 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 - * N MT RSV PFN reserved for SW D A G U X W R V - */ -#define _PAGE_PFN_MASK GENMASK(53, 10) - -/* - * [63] Svnapot definitions: - * 0 Svnapot disabled - * 1 Svnapot enabled - */ -#define _PAGE_NAPOT_SHIFT 63 -#define _PAGE_NAPOT BIT(_PAGE_NAPOT_SHIFT) /* * Only 64KB (order 4) napot ptes supported. */ @@ -113,18 +99,6 @@ enum napot_cont_order { #if defined(CONFIG_RISCV_ISA_SVPBMT) || defined(CONFIG_ERRATA_THEAD_MAE) -/* - * [62:61] Svpbmt Memory Type definitions: - * - * 00 - PMA Normal Cacheable, No change to implied PMA memory type - * 01 - NC Non-cacheable, idempotent, weakly-ordered Main Memory - * 10 - IO Non-cacheable, non-idempotent, strongly-ordered I/O memory - * 11 - Rsvd Reserved for future standard use - */ -#define _PAGE_NOCACHE (1UL << 61) -#define _PAGE_IO (2UL << 61) -#define _PAGE_MTMASK (3UL << 61) - /* * ALT_FIXUP_MT * @@ -176,10 +150,6 @@ enum napot_cont_order { #else -#define _PAGE_NOCACHE 0 -#define _PAGE_IO 0 -#define _PAGE_MTMASK 0 - #define ALT_FIXUP_MT(_val) #endif /* CONFIG_RISCV_ISA_SVPBMT || CONFIG_ERRATA_THEAD_MAE */ diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h index 179bd4afece4..18c50cbd78bf 100644 --- a/arch/riscv/include/asm/pgtable-bits.h +++ b/arch/riscv/include/asm/pgtable-bits.h @@ -6,6 +6,16 @@ #ifndef _ASM_RISCV_PGTABLE_BITS_H #define _ASM_RISCV_PGTABLE_BITS_H +/* + * rv32 PTE format: + * | XLEN-1 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 + * PFN reserved for SW D A G U X W R V + * + * rv64 PTE format: + * | 63 | 62 61 | 60 54 | 53 10 | 9 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 + * N MT RSV PFN reserved for SW D A G U X W R V + */ + #define _PAGE_ACCESSED_OFFSET 6 #define _PAGE_PRESENT (1 << 0) @@ -21,6 +31,36 @@ #define _PAGE_SPECIAL (1 << 8) /* RSW: 0x1 */ #define _PAGE_TABLE _PAGE_PRESENT +#define _PAGE_PFN_SHIFT 10 +#ifdef CONFIG_64BIT +#define _PAGE_PFN_MASK GENMASK(53, 10) +#else +#define _PAGE_PFN_MASK GENMASK(31, 10) +#endif /* CONFIG_64BIT */ + +#if defined(CONFIG_RISCV_ISA_SVPBMT) || defined(CONFIG_ERRATA_THEAD_MAE) +/* + * [62:61] Svpbmt Memory Type definitions: + * + * 00 - PMA Normal Cacheable, No change to implied PMA memory type + * 01 - NC Non-cacheable, idempotent, weakly-ordered Main Memory + * 10 - IO Non-cacheable, non-idempotent, strongly-ordered I/O memory + * 11 - Rsvd Reserved for future standard use + */ +#define _PAGE_NOCACHE (UL(1) << 61) +#define _PAGE_IO (UL(2) << 61) +#define _PAGE_MTMASK (UL(3) << 61) +#else +#define _PAGE_NOCACHE 0 +#define _PAGE_IO 0 +#define _PAGE_MTMASK 0 +#endif /* CONFIG_RISCV_ISA_SVPBMT || CONFIG_ERRATA_THEAD_MAE */ + +#ifdef CONFIG_RISCV_ISA_SVNAPOT +#define _PAGE_NAPOT_SHIFT 63 +#define _PAGE_NAPOT BIT(_PAGE_NAPOT_SHIFT) +#endif /* CONFIG_RISCV_ISA_SVNAPOT */ + /* * _PAGE_PROT_NONE is set on not-present pages (and ignored by the hardware) to * distinguish them from swapped out pages @@ -30,8 +70,6 @@ /* Used for swap PTEs only. */ #define _PAGE_SWP_EXCLUSIVE _PAGE_ACCESSED -#define _PAGE_PFN_SHIFT 10 - /* * when all of R/W/X are zero, the PTE is a pointer to the next level * of the page table; otherwise, it is a leaf PTE. -- 2.47.2 ALT_FIXUP_MT() is already using ALTERNATIVE_2(), but it needs to be extended to handle a fourth case. Add ALTERNATIVE_3(), which extends ALTERNATIVE_2() with another block of new content. Reviewed-by: Andrew Jones Signed-off-by: Samuel Holland --- (no changes since v2) Changes in v2: - Fix erroneously-escaped newline in assembly ALTERNATIVE_CFG_3 macro arch/riscv/include/asm/alternative-macros.h | 45 ++++++++++++++++++--- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/arch/riscv/include/asm/alternative-macros.h b/arch/riscv/include/asm/alternative-macros.h index 9619bd5c8eba..e8bf384da5c2 100644 --- a/arch/riscv/include/asm/alternative-macros.h +++ b/arch/riscv/include/asm/alternative-macros.h @@ -50,8 +50,17 @@ ALT_NEW_CONTENT \vendor_id_2, \patch_id_2, \enable_2, "\new_c_2" .endm +.macro ALTERNATIVE_CFG_3 old_c, new_c_1, vendor_id_1, patch_id_1, enable_1, \ + new_c_2, vendor_id_2, patch_id_2, enable_2, \ + new_c_3, vendor_id_3, patch_id_3, enable_3 + ALTERNATIVE_CFG_2 "\old_c", "\new_c_1", \vendor_id_1, \patch_id_1, \enable_1 \ + "\new_c_2", \vendor_id_2, \patch_id_2, \enable_2 + ALT_NEW_CONTENT \vendor_id_3, \patch_id_3, \enable_3, "\new_c_3" +.endm + #define __ALTERNATIVE_CFG(...) ALTERNATIVE_CFG __VA_ARGS__ #define __ALTERNATIVE_CFG_2(...) ALTERNATIVE_CFG_2 __VA_ARGS__ +#define __ALTERNATIVE_CFG_3(...) ALTERNATIVE_CFG_3 __VA_ARGS__ #else /* !__ASSEMBLER__ */ @@ -98,6 +107,13 @@ __ALTERNATIVE_CFG(old_c, new_c_1, vendor_id_1, patch_id_1, enable_1) \ ALT_NEW_CONTENT(vendor_id_2, patch_id_2, enable_2, new_c_2) +#define __ALTERNATIVE_CFG_3(old_c, new_c_1, vendor_id_1, patch_id_1, enable_1, \ + new_c_2, vendor_id_2, patch_id_2, enable_2, \ + new_c_3, vendor_id_3, patch_id_3, enable_3) \ + __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, patch_id_1, enable_1, \ + new_c_2, vendor_id_2, patch_id_2, enable_2) \ + ALT_NEW_CONTENT(vendor_id_3, patch_id_3, enable_3, new_c_3) + #endif /* __ASSEMBLER__ */ #define _ALTERNATIVE_CFG(old_c, new_c, vendor_id, patch_id, CONFIG_k) \ @@ -108,6 +124,13 @@ __ALTERNATIVE_CFG_2(old_c, new_c_1, vendor_id_1, patch_id_1, IS_ENABLED(CONFIG_k_1), \ new_c_2, vendor_id_2, patch_id_2, IS_ENABLED(CONFIG_k_2)) +#define _ALTERNATIVE_CFG_3(old_c, new_c_1, vendor_id_1, patch_id_1, CONFIG_k_1, \ + new_c_2, vendor_id_2, patch_id_2, CONFIG_k_2, \ + new_c_3, vendor_id_3, patch_id_3, CONFIG_k_3) \ + __ALTERNATIVE_CFG_3(old_c, new_c_1, vendor_id_1, patch_id_1, IS_ENABLED(CONFIG_k_1), \ + new_c_2, vendor_id_2, patch_id_2, IS_ENABLED(CONFIG_k_2), \ + new_c_3, vendor_id_3, patch_id_3, IS_ENABLED(CONFIG_k_3)) + #else /* CONFIG_RISCV_ALTERNATIVE */ #ifdef __ASSEMBLER__ @@ -118,11 +141,17 @@ #define __ALTERNATIVE_CFG(old_c, ...) ALTERNATIVE_CFG old_c #define __ALTERNATIVE_CFG_2(old_c, ...) ALTERNATIVE_CFG old_c +#define _ALTERNATIVE_CFG_3(old_c, ...) \ + ALTERNATIVE_CFG old_c + #else /* !__ASSEMBLER__ */ #define __ALTERNATIVE_CFG(old_c, ...) old_c "\n" #define __ALTERNATIVE_CFG_2(old_c, ...) old_c "\n" +#define _ALTERNATIVE_CFG_3(old_c, ...) \ + __ALTERNATIVE_CFG(old_c) + #endif /* __ASSEMBLER__ */ #define _ALTERNATIVE_CFG(old_c, ...) __ALTERNATIVE_CFG(old_c) @@ -147,15 +176,21 @@ _ALTERNATIVE_CFG(old_content, new_content, vendor_id, patch_id, CONFIG_k) /* - * A vendor wants to replace an old_content, but another vendor has used - * ALTERNATIVE() to patch its customized content at the same location. In - * this case, this vendor can create a new macro ALTERNATIVE_2() based - * on the following sample code and then replace ALTERNATIVE() with - * ALTERNATIVE_2() to append its customized content. + * Variant of ALTERNATIVE() that supports two sets of replacement content. */ #define ALTERNATIVE_2(old_content, new_content_1, vendor_id_1, patch_id_1, CONFIG_k_1, \ new_content_2, vendor_id_2, patch_id_2, CONFIG_k_2) \ _ALTERNATIVE_CFG_2(old_content, new_content_1, vendor_id_1, patch_id_1, CONFIG_k_1, \ new_content_2, vendor_id_2, patch_id_2, CONFIG_k_2) +/* + * Variant of ALTERNATIVE() that supports three sets of replacement content. + */ +#define ALTERNATIVE_3(old_content, new_content_1, vendor_id_1, patch_id_1, CONFIG_k_1, \ + new_content_2, vendor_id_2, patch_id_2, CONFIG_k_2, \ + new_content_3, vendor_id_3, patch_id_3, CONFIG_k_3) \ + _ALTERNATIVE_CFG_3(old_content, new_content_1, vendor_id_1, patch_id_1, CONFIG_k_1, \ + new_content_2, vendor_id_2, patch_id_2, CONFIG_k_2, \ + new_content_3, vendor_id_3, patch_id_3, CONFIG_k_3) + #endif -- 2.47.2 Alternative assembly code may wish to use an alternate link register to minimize the number of clobbered registers. Apply the offset fix to all jalr (not jr) instructions, i.e. where rd is not x0. Signed-off-by: Samuel Holland --- (no changes since v1) arch/riscv/kernel/alternative.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/kernel/alternative.c b/arch/riscv/kernel/alternative.c index 7642704c7f18..e3eb2585faea 100644 --- a/arch/riscv/kernel/alternative.c +++ b/arch/riscv/kernel/alternative.c @@ -126,8 +126,8 @@ void riscv_alternative_fix_offsets(void *alt_ptr, unsigned int len, if (!riscv_insn_is_jalr(insn2)) continue; - /* if instruction pair is a call, it will use the ra register */ - if (RV_EXTRACT_RD_REG(insn) != 1) + /* if instruction pair is a call, it will save a link register */ + if (RV_EXTRACT_RD_REG(insn) == 0) continue; riscv_alternative_fix_auipc_jalr(alt_ptr + i * sizeof(u32), -- 2.47.2 DMA_DIRECT_REMAP allows the kernel to make pages coherent for DMA by remapping them in the page tables with a different pgprot_t value. On RISC-V, this is supported by the page-based memory type extensions (Svpbmt and Xtheadmae). It is independent from the software cache maintenance extensions (Zicbom and Xtheadcmo). Signed-off-by: Samuel Holland --- Changes in v3: - New patch for v3 arch/riscv/Kconfig | 2 +- arch/riscv/Kconfig.errata | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index fadec20b87a8..cf5a4b5cdcd4 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -598,6 +598,7 @@ config RISCV_ISA_SVPBMT depends on 64BIT && MMU depends on RISCV_ALTERNATIVE default y + select DMA_DIRECT_REMAP help Add support for the Svpbmt ISA-extension (Supervisor-mode: page-based memory types) in the kernel when it is detected at boot. @@ -811,7 +812,6 @@ config RISCV_ISA_ZICBOM depends on RISCV_ALTERNATIVE default y select RISCV_DMA_NONCOHERENT - select DMA_DIRECT_REMAP help Add support for the Zicbom extension (Cache Block Management Operations) and enable its use in the kernel when it is detected diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata index aca9b0cfcfec..46a353a266e5 100644 --- a/arch/riscv/Kconfig.errata +++ b/arch/riscv/Kconfig.errata @@ -108,6 +108,7 @@ config ERRATA_THEAD config ERRATA_THEAD_MAE bool "Apply T-Head's memory attribute extension (XTheadMae) errata" depends on ERRATA_THEAD && 64BIT && MMU + select DMA_DIRECT_REMAP select RISCV_ALTERNATIVE_EARLY default y help @@ -119,7 +120,6 @@ config ERRATA_THEAD_MAE config ERRATA_THEAD_CMO bool "Apply T-Head cache management errata" depends on ERRATA_THEAD && MMU - select DMA_DIRECT_REMAP select RISCV_DMA_NONCOHERENT select RISCV_NONSTANDARD_CACHE_OPS default y -- 2.47.2 Information about physical memory regions is needed by both the kernel and M-mode firmware. For example, the kernel needs to know about noncacheable aliases of cacheable memory in order to allocate coherent memory pages for DMA. M-mode firmware needs to know about those aliases so it can protect itself from lower-privileged software. The RISC-V Privileged Architecture delegates the description of Physical Memory Attributes (PMAs) to the platform. On DT-based platforms, it makes sense to put this information in the devicetree. Signed-off-by: Samuel Holland --- Changes in v3: - Split PMR_IS_ALIAS flag from PMR_ALIAS_MASK number - Add "model" property to DT binding example to fix validation Changes in v2: - Remove references to Physical Address Width (no longer part of Smmpt) - Remove special first entry from the list of physical memory regions - Fix compatible string in DT binding example .../bindings/riscv/physical-memory.yaml | 92 +++++++++++++++++++ include/dt-bindings/riscv/physical-memory.h | 45 +++++++++ 2 files changed, 137 insertions(+) create mode 100644 Documentation/devicetree/bindings/riscv/physical-memory.yaml create mode 100644 include/dt-bindings/riscv/physical-memory.h diff --git a/Documentation/devicetree/bindings/riscv/physical-memory.yaml b/Documentation/devicetree/bindings/riscv/physical-memory.yaml new file mode 100644 index 000000000000..8beaa588c71c --- /dev/null +++ b/Documentation/devicetree/bindings/riscv/physical-memory.yaml @@ -0,0 +1,92 @@ +# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +%YAML 1.2 +--- +$id: http://devicetree.org/schemas/riscv/physical-memory.yaml# +$schema: http://devicetree.org/meta-schemas/core.yaml# + +title: RISC-V Physical Memory Regions + +maintainers: + - Samuel Holland + +description: + The RISC-V Privileged Architecture defines a number of Physical Memory + Attributes (PMAs) which apply to a given region of memory. These include the + types of accesses (read, write, execute, LR/SC, and/or AMO) allowed within + a region, the supported access widths and alignments, the cacheability and + coherence of the region, and whether or not accesses to the region may have + side effects. + + Some RISC-V platforms provide multiple physical address mappings for main + memory or certain peripherals. Each alias of a region generally has different + PMAs (e.g. cacheable vs non-cacheable), which allows software to dynamically + select the PMAs for an access by referencing the corresponding alias. + + On DT-based RISC-V platforms, this information is provided by the + riscv,physical-memory-regions property of the root node. + +properties: + $nodename: + const: '/' + + riscv,physical-memory-regions: + $ref: /schemas/types.yaml#/definitions/uint32-matrix + description: + Each table entry provides PMAs for a specific physical memory region, + which must not overlap with any other table entry. + minItems: 1 + maxItems: 256 + items: + minItems: 4 + maxItems: 6 + additionalItems: true + items: + - description: CPU physical address (#address-cells) + - description: > + Size (#size-cells). For entry 0, if the size is zero, the size is + assumed to be 2^(32 * #size-cells). + - description: > + Flags describing the most restrictive PMAs for any address within + the region. + + The least significant byte indicates the types of accesses allowed + for this region. Note that a memory region may support a type of + access (e.g. AMOs) even if the CPU does not. + + The next byte describes the cacheability, coherence, idempotency, + and ordering PMAs for this region. It also includes a flag to + indicate that accesses to a region are unsafe and must be + prohibited by software (for example using PMPs or Smmpt). + + The third byte is reserved for future PMAs. + + The most significant byte is the index of the lowest-numbered entry + which this entry is an alias of, if any. Aliases need not be the + same size, for example if a smaller memory region repeats within a + larger alias. + - description: Reserved for describing future PMAs + +additionalProperties: true + +examples: + - | + #include + + / { + compatible = "beagle,beaglev-starlight-jh7100-r0", "starfive,jh7100"; + model = "BeagleV Starlight Beta"; + #address-cells = <2>; + #size-cells = <2>; + + riscv,physical-memory-regions = + <0x00 0x18000000 0x00 0x00020000 (PMA_RWX | PMA_NONCACHEABLE_MEMORY) 0x0>, + <0x00 0x18080000 0x00 0x00020000 (PMA_RWX | PMA_NONCACHEABLE_MEMORY) 0x0>, + <0x00 0x41000000 0x00 0x1f000000 (PMA_RWX | PMA_NONCACHEABLE_MEMORY) 0x0>, + <0x00 0x61000000 0x00 0x1f000000 (PMA_RWXA | PMA_NONCOHERENT_MEMORY | PMR_ALIAS(3)) 0x0>, + <0x00 0x80000000 0x08 0x00000000 (PMA_RWXA | PMA_NONCOHERENT_MEMORY) 0x0>, + <0x10 0x00000000 0x08 0x00000000 (PMA_RWX | PMA_NONCACHEABLE_MEMORY | PMR_ALIAS(5)) 0x0>, + <0x20 0x00000000 0x10 0x00000000 (PMA_RWX | PMA_NONCACHEABLE_MEMORY) 0x0>, + <0x30 0x00000000 0x10 0x00000000 (PMA_RWXA | PMA_NONCOHERENT_MEMORY | PMR_ALIAS(7)) 0x0>; + }; + +... diff --git a/include/dt-bindings/riscv/physical-memory.h b/include/dt-bindings/riscv/physical-memory.h new file mode 100644 index 000000000000..d6ed8015c535 --- /dev/null +++ b/include/dt-bindings/riscv/physical-memory.h @@ -0,0 +1,45 @@ +/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */ + +#ifndef _DT_BINDINGS_RISCV_PHYSICAL_MEMORY_H +#define _DT_BINDINGS_RISCV_PHYSICAL_MEMORY_H + +#define PMA_READ (1 << 0) +#define PMA_WRITE (1 << 1) +#define PMA_EXECUTE (1 << 2) +#define PMA_AMO_MASK (3 << 4) +#define PMA_AMO_NONE (0 << 4) +#define PMA_AMO_SWAP (1 << 4) +#define PMA_AMO_LOGICAL (2 << 4) +#define PMA_AMO_ARITHMETIC (3 << 4) +#define PMA_RSRV_MASK (3 << 6) +#define PMA_RSRV_NONE (0 << 6) +#define PMA_RSRV_NON_EVENTUAL (1 << 6) +#define PMA_RSRV_EVENTUAL (2 << 6) + +#define PMA_RW (PMA_READ | PMA_WRITE) +#define PMA_RWA (PMA_RW | PMA_AMO_ARITHMETIC | PMA_RSRV_EVENTUAL) +#define PMA_RWX (PMA_RW | PMA_EXECUTE) +#define PMA_RWXA (PMA_RWA | PMA_EXECUTE) + +#define PMA_ORDER_MASK (3 << 8) +#define PMA_ORDER_IO_RELAXED (0 << 8) +#define PMA_ORDER_IO_STRONG (1 << 8) +#define PMA_ORDER_MEMORY (2 << 8) +#define PMA_READ_IDEMPOTENT (1 << 10) +#define PMA_WRITE_IDEMPOTENT (1 << 11) +#define PMA_CACHEABLE (1 << 12) +#define PMA_COHERENT (1 << 13) + +#define PMA_UNSAFE (1 << 15) + +#define PMA_IO (PMA_ORDER_IO_RELAXED) +#define PMA_NONCACHEABLE_MEMORY (PMA_ORDER_MEMORY | PMA_READ_IDEMPOTENT | \ + PMA_WRITE_IDEMPOTENT) +#define PMA_NONCOHERENT_MEMORY (PMA_NONCACHEABLE_MEMORY | PMA_CACHEABLE) +#define PMA_NORMAL_MEMORY (PMA_NONCOHERENT_MEMORY | PMA_COHERENT) + +#define PMR_ALIAS_MASK (0x7f << 24) +#define PMR_IS_ALIAS (0x80 << 24) +#define PMR_ALIAS(n) (PMR_IS_ALIAS | ((n) << 24)) + +#endif /* _DT_BINDINGS_RISCV_PHYSICAL_MEMORY_H */ -- 2.47.2 On some RISC-V platforms, RAM is mapped simultaneously to multiple physical address ranges, with each alias having a different set of statically-determined Physical Memory Attributes (PMAs). Software alters the PMAs for a particular page at runtime by selecting a PFN from among the aliases of that page's physical address. Implement this by transforming the PFN when writing page tables. If the memory type field is nonzero, replace the PFN with the corresponding PFN from the noncached alias. Similarly, when reading from the page tables, if the PFN is found in a noncached alias, replace it with the PFN from the normal memory alias, and insert _PAGE_NOCACHE. The rest of the kernel sees only PFNs from normal memory and _PAGE_MTMASK values as if Svpbmt was implemented. Memory alias pairs are determined from the devicetree. A Linux custom ISA extension is added to trigger the alternative patching, as alternatives must be linked to an extension or a vendor erratum, and this behavior is not associated with any particular processor vendor. Signed-off-by: Samuel Holland --- Changes in v3: - Fix the logic to allow an alias to be paired with region entry 0 - Select DMA_DIRECT_REMAP Changes in v2: - Put new code behind a new Kconfig option RISCV_ISA_XLINUXMEMALIAS - Document the calling convention of riscv_fixup/unfix_memory_alias() - Do not transform !pte_present() (e.g. swap) PTEs - Export riscv_fixup/unfix_memory_alias() to fix module compilation arch/riscv/Kconfig | 17 ++++ arch/riscv/include/asm/hwcap.h | 1 + arch/riscv/include/asm/pgtable-64.h | 44 +++++++-- arch/riscv/include/asm/pgtable-bits.h | 5 +- arch/riscv/include/asm/pgtable.h | 8 ++ arch/riscv/kernel/cpufeature.c | 6 ++ arch/riscv/kernel/setup.c | 1 + arch/riscv/mm/Makefile | 1 + arch/riscv/mm/memory-alias.S | 123 ++++++++++++++++++++++++++ arch/riscv/mm/pgtable.c | 91 +++++++++++++++++++ arch/riscv/mm/ptdump.c | 6 +- 11 files changed, 291 insertions(+), 12 deletions(-) create mode 100644 arch/riscv/mm/memory-alias.S diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index cf5a4b5cdcd4..21efa0d9bdd4 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -877,6 +877,23 @@ config TOOLCHAIN_NEEDS_OLD_ISA_SPEC versions of clang and GCC to be passed to GAS, which has the same result as passing zicsr and zifencei to -march. +config RISCV_ISA_XLINUXMEMALIAS + bool "Use physical memory aliases to emulate page-based memory types" + depends on 64BIT && MMU + depends on RISCV_ALTERNATIVE + default y + select DMA_DIRECT_REMAP + help + Add support for the kernel to alter the Physical Memory Attributes + (PMAs) of a page at runtime by selecting from among the aliases of + that page in the physical address space. + + On systems where physical memory aliases are present, this option + is required in order to mark pages as non-cacheable for use with + non-coherent DMA devices. + + If you don't know what to do here, say Y. + config FPU bool "FPU support" default y diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index affd63e11b0a..6c6349fe15a7 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -107,6 +107,7 @@ #define RISCV_ISA_EXT_ZALRSC 98 #define RISCV_ISA_EXT_ZICBOP 99 +#define RISCV_ISA_EXT_XLINUXMEMALIAS 126 #define RISCV_ISA_EXT_XLINUXENVCFG 127 #define RISCV_ISA_EXT_MAX 128 diff --git a/arch/riscv/include/asm/pgtable-64.h b/arch/riscv/include/asm/pgtable-64.h index fa2c1dcb6f72..f1ecd022e3ee 100644 --- a/arch/riscv/include/asm/pgtable-64.h +++ b/arch/riscv/include/asm/pgtable-64.h @@ -97,7 +97,8 @@ enum napot_cont_order { #define HUGE_MAX_HSTATE 2 #endif -#if defined(CONFIG_RISCV_ISA_SVPBMT) || defined(CONFIG_ERRATA_THEAD_MAE) +#if defined(CONFIG_RISCV_ISA_SVPBMT) || defined(CONFIG_RISCV_ISA_XLINUXMEMALIAS) || \ + defined(CONFIG_ERRATA_THEAD_MAE) /* * ALT_FIXUP_MT @@ -107,6 +108,9 @@ enum napot_cont_order { * * On systems that support Svpbmt, the memory type bits are left alone. * + * On systems that support XLinuxMemalias, PTEs with a nonzero memory type have + * the memory type bits cleared and the PFN replaced with the matching alias. + * * On systems that support XTheadMae, a Svpbmt memory type is transformed * into the corresponding XTheadMae memory type. * @@ -129,22 +133,35 @@ enum napot_cont_order { */ #define ALT_FIXUP_MT(_val) \ - asm(ALTERNATIVE_2("addi t0, zero, 0x3\n\t" \ + asm(ALTERNATIVE_3("addi t0, zero, 0x3\n\t" \ "slli t0, t0, 61\n\t" \ "not t0, t0\n\t" \ "and %0, %0, t0\n\t" \ "nop\n\t" \ "nop\n\t" \ + "nop\n\t" \ "nop", \ - __nops(7), \ + __nops(8), \ 0, RISCV_ISA_EXT_SVPBMT, CONFIG_RISCV_ISA_SVPBMT, \ + "addi t0, zero, 0x3\n\t" \ + "slli t0, t0, 61\n\t" \ + "and t0, %0, t0\n\t" \ + "beqz t0, 2f\n\t" \ + "xor t1, %0, t0\n\t" \ + "1: auipc t0, %%pcrel_hi(riscv_fixup_memory_alias)\n\t" \ + "jalr t0, t0, %%pcrel_lo(1b)\n\t" \ + "mv %0, t1\n" \ + "2:", \ + 0, RISCV_ISA_EXT_XLINUXMEMALIAS, \ + CONFIG_RISCV_ISA_XLINUXMEMALIAS, \ "srli t0, %0, 59\n\t" \ "seqz t1, t0\n\t" \ "slli t1, t1, 1\n\t" \ "or t0, t0, t1\n\t" \ "xori t0, t0, 0x5\n\t" \ "slli t0, t0, 60\n\t" \ - "xor %0, %0, t0", \ + "xor %0, %0, t0\n\t" \ + "nop", \ THEAD_VENDOR_ID, ERRATA_THEAD_MAE, CONFIG_ERRATA_THEAD_MAE) \ : "+r" (_val) :: "t0", "t1") @@ -152,9 +169,9 @@ enum napot_cont_order { #define ALT_FIXUP_MT(_val) -#endif /* CONFIG_RISCV_ISA_SVPBMT || CONFIG_ERRATA_THEAD_MAE */ +#endif /* CONFIG_RISCV_ISA_SVPBMT || CONFIG_RISCV_ISA_XLINUXMEMALIAS || CONFIG_ERRATA_THEAD_MAE */ -#if defined(CONFIG_ERRATA_THEAD_MAE) +#if defined(CONFIG_RISCV_ISA_XLINUXMEMALIAS) || defined(CONFIG_ERRATA_THEAD_MAE) /* * ALT_UNFIX_MT @@ -162,6 +179,9 @@ enum napot_cont_order { * On systems that support Svpbmt, or do not support any form of page-based * memory type configuration, the memory type bits are left alone. * + * On systems that support XLinuxMemalias, PTEs with an aliased PFN have the + * matching memory type set and the PFN replaced with the normal memory alias. + * * On systems that support XTheadMae, the XTheadMae memory type (or zero) is * transformed back into the corresponding Svpbmt memory type. * @@ -172,7 +192,15 @@ enum napot_cont_order { */ #define ALT_UNFIX_MT(_val) \ - asm(ALTERNATIVE(__nops(6), \ + asm(ALTERNATIVE_2(__nops(6), \ + "mv t1, %0\n\t" \ + "1: auipc t0, %%pcrel_hi(riscv_unfix_memory_alias)\n\t" \ + "jalr t0, t0, %%pcrel_lo(1b)\n\t" \ + "mv %0, t1\n\t" \ + "nop\n\t" \ + "nop", \ + 0, RISCV_ISA_EXT_XLINUXMEMALIAS, \ + CONFIG_RISCV_ISA_XLINUXMEMALIAS, \ "srli t0, %0, 60\n\t" \ "andi t0, t0, 0xd\n\t" \ "srli t1, t0, 1\n\t" \ @@ -236,7 +264,7 @@ static inline pgd_t pgdp_get(pgd_t *pgdp) #define ALT_UNFIX_MT(_val) -#endif /* CONFIG_ERRATA_THEAD_MAE */ +#endif /* CONFIG_RISCV_ISA_XLINUXMEMALIAS || CONFIG_ERRATA_THEAD_MAE */ static inline int pud_present(pud_t pud) { diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h index 18c50cbd78bf..4586917b2d98 100644 --- a/arch/riscv/include/asm/pgtable-bits.h +++ b/arch/riscv/include/asm/pgtable-bits.h @@ -38,7 +38,8 @@ #define _PAGE_PFN_MASK GENMASK(31, 10) #endif /* CONFIG_64BIT */ -#if defined(CONFIG_RISCV_ISA_SVPBMT) || defined(CONFIG_ERRATA_THEAD_MAE) +#if defined(CONFIG_RISCV_ISA_SVPBMT) || defined(CONFIG_RISCV_ISA_XLINUXMEMALIAS) || \ + defined(CONFIG_ERRATA_THEAD_MAE) /* * [62:61] Svpbmt Memory Type definitions: * @@ -54,7 +55,7 @@ #define _PAGE_NOCACHE 0 #define _PAGE_IO 0 #define _PAGE_MTMASK 0 -#endif /* CONFIG_RISCV_ISA_SVPBMT || CONFIG_ERRATA_THEAD_MAE */ +#endif /* CONFIG_RISCV_ISA_SVPBMT || CONFIG_RISCV_ISA_XLINUXMEMALIAS || CONFIG_ERRATA_THEAD_MAE */ #ifdef CONFIG_RISCV_ISA_SVNAPOT #define _PAGE_NAPOT_SHIFT 63 diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 8b622f901707..27e8c20af0e2 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -1113,6 +1113,14 @@ extern u64 satp_mode; void paging_init(void); void misc_mem_init(void); +#ifdef CONFIG_RISCV_ISA_XLINUXMEMALIAS +bool __init riscv_have_memory_alias(void); +void __init riscv_init_memory_alias(void); +#else +static inline bool riscv_have_memory_alias(void) { return false; } +static inline void riscv_init_memory_alias(void) {} +#endif /* CONFIG_RISCV_ISA_XLINUXMEMALIAS */ + /* * ZERO_PAGE is a global shared page that is always zero, * used for zero-mapped memory areas, etc. diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 72ca768f4e91..ee59b160e886 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -1093,6 +1093,12 @@ void __init riscv_fill_hwcap(void) riscv_v_setup_vsize(); } + /* Vendor-independent alternatives require a bit in the ISA bitmap. */ + if (riscv_have_memory_alias()) { + set_bit(RISCV_ISA_EXT_XLINUXMEMALIAS, riscv_isa); + pr_info("Using physical memory alias for noncached mappings\n"); + } + memset(print_str, 0, sizeof(print_str)); for (i = 0, j = 0; i < NUM_ALPHA_EXTS; i++) if (riscv_isa[0] & BIT_MASK(i)) diff --git a/arch/riscv/kernel/setup.c b/arch/riscv/kernel/setup.c index b5bc5fc65cea..a6f821150101 100644 --- a/arch/riscv/kernel/setup.c +++ b/arch/riscv/kernel/setup.c @@ -357,6 +357,7 @@ void __init setup_arch(char **cmdline_p) } riscv_init_cbo_blocksizes(); + riscv_init_memory_alias(); riscv_fill_hwcap(); apply_boot_alternatives(); init_rt_signal_env(); diff --git a/arch/riscv/mm/Makefile b/arch/riscv/mm/Makefile index b916a68d324a..b4d757226efb 100644 --- a/arch/riscv/mm/Makefile +++ b/arch/riscv/mm/Makefile @@ -33,3 +33,4 @@ endif obj-$(CONFIG_DEBUG_VIRTUAL) += physaddr.o obj-$(CONFIG_RISCV_DMA_NONCOHERENT) += dma-noncoherent.o obj-$(CONFIG_RISCV_NONSTANDARD_CACHE_OPS) += cache-ops.o +obj-$(CONFIG_RISCV_ISA_XLINUXMEMALIAS) += memory-alias.o diff --git a/arch/riscv/mm/memory-alias.S b/arch/riscv/mm/memory-alias.S new file mode 100644 index 000000000000..e37b83d11591 --- /dev/null +++ b/arch/riscv/mm/memory-alias.S @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2024 SiFive + */ + +#include +#include +#include +#include + +#define CACHED_BASE_OFFSET (0 * RISCV_SZPTR) +#define NONCACHED_BASE_OFFSET (1 * RISCV_SZPTR) +#define SIZE_OFFSET (2 * RISCV_SZPTR) + +#define SIZEOF_PAIR (4 * RISCV_SZPTR) + +/* + * Called from ALT_FIXUP_MT with a non-standard calling convention: + * t0 => return address + * t1 => page table entry + * all other registers are callee-saved + */ +SYM_CODE_START(riscv_fixup_memory_alias) + addi sp, sp, -4 * SZREG + REG_S t2, (0 * SZREG)(sp) + REG_S t3, (1 * SZREG)(sp) + REG_S t4, (2 * SZREG)(sp) +#ifdef CONFIG_RISCV_ISA_SVNAPOT + REG_S t5, (3 * SZREG)(sp) + + /* Save and mask off _PAGE_NAPOT if present. */ + li t5, _PAGE_NAPOT + and t5, t1, t5 + xor t1, t1, t5 +#endif + + /* Ignore !pte_present() PTEs, including swap PTEs. */ + andi t2, t1, (_PAGE_PRESENT | _PAGE_PROT_NONE) + beqz t2, .Lfixup_end + + lla t2, memory_alias_pairs +.Lfixup_loop: + REG_L t3, SIZE_OFFSET(t2) + beqz t3, .Lfixup_end + REG_L t4, CACHED_BASE_OFFSET(t2) + sub t4, t1, t4 + bltu t4, t3, .Lfixup_found + addi t2, t2, SIZEOF_PAIR + j .Lfixup_loop + +.Lfixup_found: + REG_L t3, NONCACHED_BASE_OFFSET(t2) + add t1, t3, t4 + +.Lfixup_end: +#ifdef CONFIG_RISCV_ISA_SVNAPOT + xor t1, t1, t5 + + REG_L t5, (3 * SZREG)(sp) +#endif + REG_L t4, (2 * SZREG)(sp) + REG_L t3, (1 * SZREG)(sp) + REG_L t2, (0 * SZREG)(sp) + addi sp, sp, 4 * SZREG + jr t0 +SYM_CODE_END(riscv_fixup_memory_alias) +EXPORT_SYMBOL(riscv_fixup_memory_alias) + +/* + * Called from ALT_UNFIX_MT with a non-standard calling convention: + * t0 => return address + * t1 => page table entry + * all other registers are callee-saved + */ +SYM_CODE_START(riscv_unfix_memory_alias) + addi sp, sp, -4 * SZREG + REG_S t2, (0 * SZREG)(sp) + REG_S t3, (1 * SZREG)(sp) + REG_S t4, (2 * SZREG)(sp) +#ifdef CONFIG_RISCV_ISA_SVNAPOT + REG_S t5, (3 * SZREG)(sp) + + /* Save and mask off _PAGE_NAPOT if present. */ + li t5, _PAGE_NAPOT + and t5, t1, t5 + xor t1, t1, t5 +#endif + + /* Ignore !pte_present() PTEs, including swap PTEs. */ + andi t2, t1, (_PAGE_PRESENT | _PAGE_PROT_NONE) + beqz t2, .Lunfix_end + + lla t2, memory_alias_pairs +.Lunfix_loop: + REG_L t3, SIZE_OFFSET(t2) + beqz t3, .Lunfix_end + REG_L t4, NONCACHED_BASE_OFFSET(t2) + sub t4, t1, t4 + bltu t4, t3, .Lunfix_found + addi t2, t2, SIZEOF_PAIR + j .Lunfix_loop + +.Lunfix_found: + REG_L t3, CACHED_BASE_OFFSET(t2) + add t1, t3, t4 + + /* PFN was in the noncached alias, so mark it as such. */ + li t2, _PAGE_NOCACHE + or t1, t1, t2 + +.Lunfix_end: +#ifdef CONFIG_RISCV_ISA_SVNAPOT + xor t1, t1, t5 + + REG_L t5, (3 * SZREG)(sp) +#endif + REG_L t4, (2 * SZREG)(sp) + REG_L t3, (1 * SZREG)(sp) + REG_L t2, (0 * SZREG)(sp) + addi sp, sp, 4 * SZREG + jr t0 +SYM_CODE_END(riscv_unfix_memory_alias) +EXPORT_SYMBOL(riscv_unfix_memory_alias) diff --git a/arch/riscv/mm/pgtable.c b/arch/riscv/mm/pgtable.c index 604744d6924f..45f6a0ac22fa 100644 --- a/arch/riscv/mm/pgtable.c +++ b/arch/riscv/mm/pgtable.c @@ -1,8 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include #include #include +#include +#include #include int ptep_set_access_flags(struct vm_area_struct *vma, @@ -160,3 +164,90 @@ pud_t pudp_invalidate(struct vm_area_struct *vma, unsigned long address, return old; } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + +#ifdef CONFIG_RISCV_ISA_XLINUXMEMALIAS +struct memory_alias_pair { + unsigned long cached_base; + unsigned long noncached_base; + unsigned long size; + int index; +} memory_alias_pairs[5]; + +bool __init riscv_have_memory_alias(void) +{ + return memory_alias_pairs[0].size; +} + +void __init riscv_init_memory_alias(void) +{ + int na = of_n_addr_cells(of_root); + int ns = of_n_size_cells(of_root); + int nc = na + ns + 2; + const __be32 *prop; + int pairs = 0; + int len; + + prop = of_get_property(of_root, "riscv,physical-memory-regions", &len); + if (!prop) + return; + + len /= sizeof(__be32); + for (int i = 0; len >= nc; i++, prop += nc, len -= nc) { + unsigned long base = of_read_ulong(prop, na); + unsigned long size = of_read_ulong(prop + na, ns); + unsigned long flags = be32_to_cpup(prop + na + ns); + struct memory_alias_pair *pair; + + /* We only care about non-coherent memory. */ + if ((flags & PMA_ORDER_MASK) != PMA_ORDER_MEMORY || (flags & PMA_COHERENT)) + continue; + + /* The cacheable alias must be usable memory. */ + if ((flags & PMA_CACHEABLE) && + !memblock_overlaps_region(&memblock.memory, base, size)) + continue; + + if (flags & PMR_IS_ALIAS) { + int alias = FIELD_GET(PMR_ALIAS_MASK, flags); + + pair = NULL; + for (int j = 0; j < pairs; j++) { + if (alias == memory_alias_pairs[j].index) { + pair = &memory_alias_pairs[j]; + break; + } + } + if (!pair) + continue; + } else { + /* Leave room for the null sentinel. */ + if (pairs == ARRAY_SIZE(memory_alias_pairs) - 1) + continue; + pair = &memory_alias_pairs[pairs++]; + pair->index = i; + } + + /* Align the address and size with the page table PFN field. */ + base >>= PAGE_SHIFT - _PAGE_PFN_SHIFT; + size >>= PAGE_SHIFT - _PAGE_PFN_SHIFT; + + if (flags & PMA_CACHEABLE) + pair->cached_base = base; + else + pair->noncached_base = base; + pair->size = min_not_zero(pair->size, size); + } + + /* Remove any unmatched pairs. */ + for (int i = 0; i < pairs; i++) { + struct memory_alias_pair *pair = &memory_alias_pairs[i]; + + if (pair->cached_base && pair->noncached_base && pair->size) + continue; + + for (int j = i + 1; j < pairs; j++) + memory_alias_pairs[j - 1] = memory_alias_pairs[j]; + memory_alias_pairs[--pairs].size = 0; + } +} +#endif /* CONFIG_RISCV_ISA_XLINUXMEMALIAS */ diff --git a/arch/riscv/mm/ptdump.c b/arch/riscv/mm/ptdump.c index 763ffde8ab5e..29a7be14cca5 100644 --- a/arch/riscv/mm/ptdump.c +++ b/arch/riscv/mm/ptdump.c @@ -140,7 +140,8 @@ static const struct prot_bits pte_bits[] = { .clear = ".", }, { #endif -#if defined(CONFIG_RISCV_ISA_SVPBMT) || defined(CONFIG_ERRATA_THEAD_MAE) +#if defined(CONFIG_RISCV_ISA_SVPBMT) || defined(CONFIG_RISCV_ISA_XLINUXMEMALIAS) || \ + defined(CONFIG_ERRATA_THEAD_MAE) .mask = _PAGE_MTMASK, .set = "MT(%s)", .clear = " .. ", @@ -216,7 +217,8 @@ static void dump_prot(struct pg_state *st) if (val) { if (pte_bits[i].mask == _PAGE_SOFT) sprintf(s, pte_bits[i].set, val >> 8); -#if defined(CONFIG_RISCV_ISA_SVPBMT) || defined(CONFIG_ERRATA_THEAD_MAE) +#if defined(CONFIG_RISCV_ISA_SVPBMT) || defined(CONFIG_RISCV_ISA_XLINUXMEMALIAS) || \ + defined(CONFIG_ERRATA_THEAD_MAE) else if (pte_bits[i].mask == _PAGE_MTMASK) { if (val == _PAGE_NOCACHE) sprintf(s, pte_bits[i].set, "NC"); -- 2.47.2 JH7100 provides a physical memory region which is a noncached alias of normal cacheable DRAM. Now that Linux can apply PMAs by selecting between aliases of a physical memory region, any page of DRAM can be marked as noncached for use with DMA, and the preallocated DMA pool is no longer needed. This allows portable kernels to boot on JH7100 boards. Signed-off-by: Samuel Holland --- Changes in v3: - Fix the entry number of the paired region in the DT - Keep the ERRATA_STARFIVE_JH7100 option but update its description Changes in v2: - Move the JH7100 DT changes from jh7100-common.dtsi to jh7100.dtsi - Keep RISCV_DMA_NONCOHERENT and RISCV_NONSTANDARD_CACHE_OPS selected arch/riscv/Kconfig.errata | 9 +++---- arch/riscv/Kconfig.socs | 2 ++ .../boot/dts/starfive/jh7100-common.dtsi | 24 ------------------- arch/riscv/boot/dts/starfive/jh7100.dtsi | 4 ++++ 4 files changed, 11 insertions(+), 28 deletions(-) diff --git a/arch/riscv/Kconfig.errata b/arch/riscv/Kconfig.errata index 46a353a266e5..be5afec66eaa 100644 --- a/arch/riscv/Kconfig.errata +++ b/arch/riscv/Kconfig.errata @@ -77,13 +77,11 @@ config ERRATA_SIFIVE_CIP_1200 If you don't know what to do here, say "Y". config ERRATA_STARFIVE_JH7100 - bool "StarFive JH7100 support" + bool "StarFive JH7100 support for old devicetrees" depends on ARCH_STARFIVE depends on !DMA_DIRECT_REMAP depends on NONPORTABLE select DMA_GLOBAL_POOL - select RISCV_DMA_NONCOHERENT - select RISCV_NONSTANDARD_CACHE_OPS select SIFIVE_CCACHE default n help @@ -93,7 +91,10 @@ config ERRATA_STARFIVE_JH7100 cache operations through the SiFive cache controller. Say "Y" if you want to support the BeagleV Starlight and/or - StarFive VisionFive V1 boards. + StarFive VisionFive V1 boards with older devicetrees that reserve + memory for DMA using a "shared-dma-pool". If your devicetree has + the "riscv,physical-memory-regions" property, you should instead + enable RISCV_ISA_XLINUXMEMALIAS and use a portable kernel. config ERRATA_THEAD bool "T-HEAD errata" diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index 848e7149e443..a8950206fb75 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -50,6 +50,8 @@ config SOC_STARFIVE bool "StarFive SoCs" select PINCTRL select RESET_CONTROLLER + select RISCV_DMA_NONCOHERENT + select RISCV_NONSTANDARD_CACHE_OPS select ARM_AMBA help This enables support for StarFive SoC platform hardware. diff --git a/arch/riscv/boot/dts/starfive/jh7100-common.dtsi b/arch/riscv/boot/dts/starfive/jh7100-common.dtsi index ae1a6aeb0aea..47d0cf55bfc0 100644 --- a/arch/riscv/boot/dts/starfive/jh7100-common.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7100-common.dtsi @@ -42,30 +42,6 @@ led-ack { }; }; - reserved-memory { - #address-cells = <2>; - #size-cells = <2>; - ranges; - - dma-reserved@fa000000 { - reg = <0x0 0xfa000000 0x0 0x1000000>; - no-map; - }; - - linux,dma@107a000000 { - compatible = "shared-dma-pool"; - reg = <0x10 0x7a000000 0x0 0x1000000>; - no-map; - linux,dma-default; - }; - }; - - soc { - dma-ranges = <0x00 0x80000000 0x00 0x80000000 0x00 0x7a000000>, - <0x00 0xfa000000 0x10 0x7a000000 0x00 0x01000000>, - <0x00 0xfb000000 0x00 0xfb000000 0x07 0x85000000>; - }; - wifi_pwrseq: wifi-pwrseq { compatible = "mmc-pwrseq-simple"; reset-gpios = <&gpio 37 GPIO_ACTIVE_LOW>; diff --git a/arch/riscv/boot/dts/starfive/jh7100.dtsi b/arch/riscv/boot/dts/starfive/jh7100.dtsi index 7de0732b8eab..c7d7ec9ed8c9 100644 --- a/arch/riscv/boot/dts/starfive/jh7100.dtsi +++ b/arch/riscv/boot/dts/starfive/jh7100.dtsi @@ -7,11 +7,15 @@ /dts-v1/; #include #include +#include / { compatible = "starfive,jh7100"; #address-cells = <2>; #size-cells = <2>; + riscv,physical-memory-regions = + <0x00 0x80000000 0x08 0x00000000 (PMA_RWXA | PMA_NONCOHERENT_MEMORY) 0x0>, + <0x10 0x00000000 0x08 0x00000000 (PMA_RWX | PMA_NONCACHEABLE_MEMORY | PMR_ALIAS(0)) 0x0>; cpus: cpus { #address-cells = <1>; -- 2.47.2 EIC7700 provides a physical memory region which is a noncached alias of normal cacheable DRAM. Declare this alias in the devicetree so Linux can allocate noncached pages for noncoherent DMA, and M-mode firmware can protect the noncached alias with PMPs. Signed-off-by: Samuel Holland --- Changes in v3: - Fix the entry number of the paired region in the DT Changes in v2: - New patch for v2 arch/riscv/Kconfig.socs | 2 ++ arch/riscv/boot/dts/eswin/eic7700.dtsi | 5 +++++ 2 files changed, 7 insertions(+) diff --git a/arch/riscv/Kconfig.socs b/arch/riscv/Kconfig.socs index a8950206fb75..df3ed1d322fe 100644 --- a/arch/riscv/Kconfig.socs +++ b/arch/riscv/Kconfig.socs @@ -9,6 +9,8 @@ config ARCH_ANDES config ARCH_ESWIN bool "ESWIN SoCs" + select RISCV_DMA_NONCOHERENT + select RISCV_NONSTANDARD_CACHE_OPS help This enables support for ESWIN SoC platform hardware, including the ESWIN EIC7700 SoC. diff --git a/arch/riscv/boot/dts/eswin/eic7700.dtsi b/arch/riscv/boot/dts/eswin/eic7700.dtsi index c3ed93008bca..d566bca4e09e 100644 --- a/arch/riscv/boot/dts/eswin/eic7700.dtsi +++ b/arch/riscv/boot/dts/eswin/eic7700.dtsi @@ -5,9 +5,14 @@ /dts-v1/; +#include + / { #address-cells = <2>; #size-cells = <2>; + riscv,physical-memory-regions = + <0x000 0x80000000 0x00f 0x80000000 (PMA_RWXA | PMA_NONCOHERENT_MEMORY) 0x0>, + <0x0c0 0x00000000 0x010 0x00000000 (PMA_RWX | PMA_NONCACHEABLE_MEMORY | PMR_ALIAS(0)) 0x0>; cpus { #address-cells = <1>; -- 2.47.2