Some platforms can customize the PTE soft dirty bit and make it unavailable even if the architecture allows providing the PTE resource. Signed-off-by: Chunyan Zhang --- fs/proc/task_mmu.c | 12 +++++++++++- include/linux/pgtable.h | 9 +++++++++ mm/debug_vm_pgtable.c | 9 +++++---- mm/huge_memory.c | 13 +++++++------ mm/internal.h | 2 +- mm/mremap.c | 13 +++++++------ mm/userfaultfd.c | 12 ++++++------ 7 files changed, 46 insertions(+), 24 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 29cca0e6d0ff..32ba2fb92975 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1058,7 +1058,7 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) * -Werror=unterminated-string-initialization warning * with GCC 15 */ - static const char mnemonics[BITS_PER_LONG][3] = { + static char mnemonics[BITS_PER_LONG][3] = { /* * In case if we meet a flag we don't know about. */ @@ -1129,6 +1129,11 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_SEALED)] = "sl", #endif }; +#ifdef CONFIG_MEM_SOFT_DIRTY + if (!pte_soft_dirty_available()) + mnemonics[ilog2(VM_SOFTDIRTY)][0] = 0; +#endif + size_t i; seq_puts(m, "VmFlags: "); @@ -1531,6 +1536,8 @@ static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, static inline void clear_soft_dirty(struct vm_area_struct *vma, unsigned long addr, pte_t *pte) { + if (!pte_soft_dirty_available()) + return; /* * The soft-dirty tracker uses #PF-s to catch writes * to pages, so write-protect the pte as well. See the @@ -1566,6 +1573,9 @@ static inline void clear_soft_dirty_pmd(struct vm_area_struct *vma, { pmd_t old, pmd = *pmdp; + if (!pte_soft_dirty_available()) + return; + if (pmd_present(pmd)) { /* See comment in change_huge_pmd() */ old = pmdp_invalidate(vma, addr, pmdp); diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 4c035637eeb7..2a489647ac96 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1538,6 +1538,15 @@ static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot) #endif #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY + +/* + * Some platforms can customize the PTE soft dirty bit and make it unavailable + * even if the architecture allows providing the PTE resource. + */ +#ifndef pte_soft_dirty_available +#define pte_soft_dirty_available() (true) +#endif + #ifndef CONFIG_ARCH_ENABLE_THP_MIGRATION static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) { diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c index 830107b6dd08..98ed7e22ccec 100644 --- a/mm/debug_vm_pgtable.c +++ b/mm/debug_vm_pgtable.c @@ -690,7 +690,7 @@ static void __init pte_soft_dirty_tests(struct pgtable_debug_args *args) { pte_t pte = pfn_pte(args->fixed_pte_pfn, args->page_prot); - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) + if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || !pte_soft_dirty_available()) return; pr_debug("Validating PTE soft dirty\n"); @@ -702,7 +702,7 @@ static void __init pte_swap_soft_dirty_tests(struct pgtable_debug_args *args) { pte_t pte; - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) + if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || !pte_soft_dirty_available()) return; pr_debug("Validating PTE swap soft dirty\n"); @@ -718,7 +718,7 @@ static void __init pmd_soft_dirty_tests(struct pgtable_debug_args *args) { pmd_t pmd; - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) + if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || !pte_soft_dirty_available()) return; if (!has_transparent_hugepage()) @@ -735,7 +735,8 @@ static void __init pmd_swap_soft_dirty_tests(struct pgtable_debug_args *args) pmd_t pmd; if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || - !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION)) + !pte_soft_dirty_available() || + !IS_ENABLED(CONFIG_ARCH_ENABLE_THP_MIGRATION)) return; if (!has_transparent_hugepage()) diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 9c38a95e9f09..2cf001b2e950 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -2271,12 +2271,13 @@ static inline int pmd_move_must_withdraw(spinlock_t *new_pmd_ptl, static pmd_t move_soft_dirty_pmd(pmd_t pmd) { -#ifdef CONFIG_MEM_SOFT_DIRTY - if (unlikely(is_pmd_migration_entry(pmd))) - pmd = pmd_swp_mksoft_dirty(pmd); - else if (pmd_present(pmd)) - pmd = pmd_mksoft_dirty(pmd); -#endif + if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && pte_soft_dirty_available()) { + if (unlikely(is_pmd_migration_entry(pmd))) + pmd = pmd_swp_mksoft_dirty(pmd); + else if (pmd_present(pmd)) + pmd = pmd_mksoft_dirty(pmd); + } + return pmd; } diff --git a/mm/internal.h b/mm/internal.h index 45b725c3dc03..8a5b20fac892 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1538,7 +1538,7 @@ static inline bool vma_soft_dirty_enabled(struct vm_area_struct *vma) * VM_SOFTDIRTY is defined as 0x0, then !(vm_flags & VM_SOFTDIRTY) * will be constantly true. */ - if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY)) + if (!IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) || !pte_soft_dirty_available()) return false; /* diff --git a/mm/mremap.c b/mm/mremap.c index e618a706aff5..7c01320aea33 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -162,12 +162,13 @@ static pte_t move_soft_dirty_pte(pte_t pte) * Set soft dirty bit so we can notice * in userspace the ptes were moved. */ -#ifdef CONFIG_MEM_SOFT_DIRTY - if (pte_present(pte)) - pte = pte_mksoft_dirty(pte); - else if (is_swap_pte(pte)) - pte = pte_swp_mksoft_dirty(pte); -#endif + if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && pte_soft_dirty_available()) { + if (pte_present(pte)) + pte = pte_mksoft_dirty(pte); + else if (is_swap_pte(pte)) + pte = pte_swp_mksoft_dirty(pte); + } + return pte; } diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 45e6290e2e8b..0e07a983c513 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -1065,9 +1065,9 @@ static int move_present_pte(struct mm_struct *mm, orig_dst_pte = folio_mk_pte(src_folio, dst_vma->vm_page_prot); /* Set soft dirty bit so userspace can notice the pte was moved */ -#ifdef CONFIG_MEM_SOFT_DIRTY - orig_dst_pte = pte_mksoft_dirty(orig_dst_pte); -#endif + if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && pte_soft_dirty_available()) + orig_dst_pte = pte_mksoft_dirty(orig_dst_pte); + if (pte_dirty(orig_src_pte)) orig_dst_pte = pte_mkdirty(orig_dst_pte); orig_dst_pte = pte_mkwrite(orig_dst_pte, dst_vma); @@ -1134,9 +1134,9 @@ static int move_swap_pte(struct mm_struct *mm, struct vm_area_struct *dst_vma, } orig_src_pte = ptep_get_and_clear(mm, src_addr, src_pte); -#ifdef CONFIG_MEM_SOFT_DIRTY - orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte); -#endif + if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && pte_soft_dirty_available()) + orig_src_pte = pte_swp_mksoft_dirty(orig_src_pte); + set_pte_at(mm, dst_addr, dst_pte, orig_src_pte); double_pt_unlock(dst_ptl, src_ptl); -- 2.34.1 Some platforms can customize the PTE uffd_wp bit and make it unavailable even if the architecture allows providing the PTE resource. This patch adds a macro API which allows architectures to define their specific one for checking if the PTE uffd_wp bit is available. Signed-off-by: Chunyan Zhang --- fs/userfaultfd.c | 25 +++++++++-------- include/asm-generic/pgtable_uffd.h | 12 ++++++++ include/linux/mm_inline.h | 6 ++-- include/linux/pgtable.h | 1 + include/linux/userfaultfd_k.h | 44 +++++++++++++++++++----------- mm/memory.c | 6 ++-- 6 files changed, 63 insertions(+), 31 deletions(-) diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 54c6cc7fe9c6..68e5006e5158 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1270,9 +1270,10 @@ static int userfaultfd_register(struct userfaultfd_ctx *ctx, if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MISSING) vm_flags |= VM_UFFD_MISSING; if (uffdio_register.mode & UFFDIO_REGISTER_MODE_WP) { -#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP - goto out; -#endif + if (!IS_ENABLED(CONFIG_HAVE_ARCH_USERFAULTFD_WP) || + !pte_uffd_wp_available()) + goto out; + vm_flags |= VM_UFFD_WP; } if (uffdio_register.mode & UFFDIO_REGISTER_MODE_MINOR) { @@ -1980,14 +1981,16 @@ static int userfaultfd_api(struct userfaultfd_ctx *ctx, uffdio_api.features &= ~(UFFD_FEATURE_MINOR_HUGETLBFS | UFFD_FEATURE_MINOR_SHMEM); #endif -#ifndef CONFIG_HAVE_ARCH_USERFAULTFD_WP - uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP; -#endif -#ifndef CONFIG_PTE_MARKER_UFFD_WP - uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM; - uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED; - uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC; -#endif + if (!IS_ENABLED(CONFIG_HAVE_ARCH_USERFAULTFD_WP) || + !pte_uffd_wp_available()) + uffdio_api.features &= ~UFFD_FEATURE_PAGEFAULT_FLAG_WP; + + if (!IS_ENABLED(CONFIG_PTE_MARKER_UFFD_WP) || + !pte_uffd_wp_available()) { + uffdio_api.features &= ~UFFD_FEATURE_WP_HUGETLBFS_SHMEM; + uffdio_api.features &= ~UFFD_FEATURE_WP_UNPOPULATED; + uffdio_api.features &= ~UFFD_FEATURE_WP_ASYNC; + } ret = -EINVAL; if (features & ~uffdio_api.features) diff --git a/include/asm-generic/pgtable_uffd.h b/include/asm-generic/pgtable_uffd.h index 828966d4c281..b86a5ff447da 100644 --- a/include/asm-generic/pgtable_uffd.h +++ b/include/asm-generic/pgtable_uffd.h @@ -61,6 +61,18 @@ static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd) { return pmd; } +#define pte_uffd_wp_available() (false) +#else +/* + * Some platforms can customize the PTE uffd_wp bit and make it unavailable + * even if the architecture allows providing the PTE resource. + * This allows architectures to define their own API for checking if + * the PTE uffd_wp bit is available. + */ +#ifndef pte_uffd_wp_available +#define pte_uffd_wp_available() (true) +#endif + #endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ #endif /* _ASM_GENERIC_PGTABLE_UFFD_H */ diff --git a/include/linux/mm_inline.h b/include/linux/mm_inline.h index 89b518ff097e..a81055bb3f87 100644 --- a/include/linux/mm_inline.h +++ b/include/linux/mm_inline.h @@ -570,7 +570,9 @@ static inline bool pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, pte_t *pte, pte_t pteval) { -#ifdef CONFIG_PTE_MARKER_UFFD_WP + if (!IS_ENABLED(CONFIG_PTE_MARKER_UFFD_WP) || !pte_uffd_wp_available()) + return false; + bool arm_uffd_pte = false; /* The current status of the pte should be "cleared" before calling */ @@ -601,7 +603,7 @@ pte_install_uffd_wp_if_needed(struct vm_area_struct *vma, unsigned long addr, make_pte_marker(PTE_MARKER_UFFD_WP)); return true; } -#endif + return false; } diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h index 2a489647ac96..51f5b610c5ec 100644 --- a/include/linux/pgtable.h +++ b/include/linux/pgtable.h @@ -1564,6 +1564,7 @@ static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) } #endif #else /* !CONFIG_HAVE_ARCH_SOFT_DIRTY */ +#define pte_soft_dirty_available() (false) static inline int pte_soft_dirty(pte_t pte) { return 0; diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index c0e716aec26a..ec4a815286c8 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -228,15 +228,15 @@ static inline bool vma_can_userfault(struct vm_area_struct *vma, if (wp_async && (vm_flags == VM_UFFD_WP)) return true; -#ifndef CONFIG_PTE_MARKER_UFFD_WP /* * If user requested uffd-wp but not enabled pte markers for * uffd-wp, then shmem & hugetlbfs are not supported but only * anonymous. */ - if ((vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) + if ((!IS_ENABLED(CONFIG_PTE_MARKER_UFFD_WP) || + !pte_uffd_wp_available()) && + (vm_flags & VM_UFFD_WP) && !vma_is_anonymous(vma)) return false; -#endif /* By default, allow any of anon|shmem|hugetlb */ return vma_is_anonymous(vma) || is_vm_hugetlb_page(vma) || @@ -437,8 +437,11 @@ static inline bool userfaultfd_wp_use_markers(struct vm_area_struct *vma) static inline bool pte_marker_entry_uffd_wp(swp_entry_t entry) { #ifdef CONFIG_PTE_MARKER_UFFD_WP - return is_pte_marker_entry(entry) && - (pte_marker_get(entry) & PTE_MARKER_UFFD_WP); + if (pte_uffd_wp_available()) + return is_pte_marker_entry(entry) && + (pte_marker_get(entry) & PTE_MARKER_UFFD_WP); + else + return false; #else return false; #endif @@ -447,14 +450,19 @@ static inline bool pte_marker_entry_uffd_wp(swp_entry_t entry) static inline bool pte_marker_uffd_wp(pte_t pte) { #ifdef CONFIG_PTE_MARKER_UFFD_WP - swp_entry_t entry; + if (pte_uffd_wp_available()) { + swp_entry_t entry; - if (!is_swap_pte(pte)) - return false; + if (!is_swap_pte(pte)) + return false; - entry = pte_to_swp_entry(pte); + entry = pte_to_swp_entry(pte); + + return pte_marker_entry_uffd_wp(entry); + } else { + return false; + } - return pte_marker_entry_uffd_wp(entry); #else return false; #endif @@ -467,14 +475,18 @@ static inline bool pte_marker_uffd_wp(pte_t pte) static inline bool pte_swp_uffd_wp_any(pte_t pte) { #ifdef CONFIG_PTE_MARKER_UFFD_WP - if (!is_swap_pte(pte)) - return false; + if (pte_uffd_wp_available()) { + if (!is_swap_pte(pte)) + return false; - if (pte_swp_uffd_wp(pte)) - return true; + if (pte_swp_uffd_wp(pte)) + return true; - if (pte_marker_uffd_wp(pte)) - return true; + if (pte_marker_uffd_wp(pte)) + return true; + } else { + return false; + } #endif return false; } diff --git a/mm/memory.c b/mm/memory.c index 0ba4f6b71847..1c61b2d7bd4d 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1465,7 +1465,9 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma, { bool was_installed = false; -#ifdef CONFIG_PTE_MARKER_UFFD_WP + if (!IS_ENABLED(CONFIG_PTE_MARKER_UFFD_WP) || !pte_uffd_wp_available()) + return was_installed; + /* Zap on anonymous always means dropping everything */ if (vma_is_anonymous(vma)) return false; @@ -1482,7 +1484,7 @@ zap_install_uffd_wp_if_needed(struct vm_area_struct *vma, pte++; addr += PAGE_SIZE; } -#endif + return was_installed; } -- 2.34.1 The Svrsw60t59b extension allows to free the PTE reserved bits 60 and 59 for software to use. Reviewed-by: Alexandre Ghiti Signed-off-by: Chunyan Zhang --- arch/riscv/Kconfig | 14 ++++++++++++++ arch/riscv/include/asm/hwcap.h | 1 + arch/riscv/kernel/cpufeature.c | 1 + 3 files changed, 16 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index a4b233a0659e..d99df67cc7a4 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -862,6 +862,20 @@ config RISCV_ISA_ZICBOP If you don't know what to do here, say Y. +config RISCV_ISA_SVRSW60T59B + bool "Svrsw60t59b extension support for using PTE bits 60 and 59" + depends on MMU && 64BIT + depends on RISCV_ALTERNATIVE + default y + help + Adds support to dynamically detect the presence of the Svrsw60t59b + extension and enable its usage. + + The Svrsw60t59b extension allows to free the PTE reserved bits 60 + and 59 for software to use. + + If you don't know what to do here, say Y. + config TOOLCHAIN_NEEDS_EXPLICIT_ZICSR_ZIFENCEI def_bool y # https://sourceware.org/git/?p=binutils-gdb.git;a=commit;h=aed44286efa8ae8717a77d94b51ac3614e2ca6dc diff --git a/arch/riscv/include/asm/hwcap.h b/arch/riscv/include/asm/hwcap.h index affd63e11b0a..f98fcb5c17d5 100644 --- a/arch/riscv/include/asm/hwcap.h +++ b/arch/riscv/include/asm/hwcap.h @@ -106,6 +106,7 @@ #define RISCV_ISA_EXT_ZAAMO 97 #define RISCV_ISA_EXT_ZALRSC 98 #define RISCV_ISA_EXT_ZICBOP 99 +#define RISCV_ISA_EXT_SVRSW60T59B 100 #define RISCV_ISA_EXT_XLINUXENVCFG 127 diff --git a/arch/riscv/kernel/cpufeature.c b/arch/riscv/kernel/cpufeature.c index 743d53415572..de29562096ff 100644 --- a/arch/riscv/kernel/cpufeature.c +++ b/arch/riscv/kernel/cpufeature.c @@ -540,6 +540,7 @@ const struct riscv_isa_ext_data riscv_isa_ext[] = { __RISCV_ISA_EXT_DATA(svnapot, RISCV_ISA_EXT_SVNAPOT), __RISCV_ISA_EXT_DATA(svpbmt, RISCV_ISA_EXT_SVPBMT), __RISCV_ISA_EXT_DATA(svvptc, RISCV_ISA_EXT_SVVPTC), + __RISCV_ISA_EXT_DATA(svrsw60t59b, RISCV_ISA_EXT_SVRSW60T59B), }; const size_t riscv_isa_ext_count = ARRAY_SIZE(riscv_isa_ext); -- 2.34.1 The Svrsw60t59b extension allows to free the PTE reserved bits 60 and 59 for software, this patch uses bit 59 for soft-dirty. To add swap PTE soft-dirty tracking, we borrow bit 3 which is available for swap PTEs on RISC-V systems. Signed-off-by: Chunyan Zhang --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/pgtable-bits.h | 19 +++++++ arch/riscv/include/asm/pgtable.h | 73 ++++++++++++++++++++++++++- 3 files changed, 91 insertions(+), 2 deletions(-) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index d99df67cc7a4..53b73e4bdf3f 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -141,6 +141,7 @@ config RISCV select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_SOFT_DIRTY if 64BIT && MMU && RISCV_ISA_SVRSW60T59B select HAVE_ARCH_THREAD_STRUCT_WHITELIST select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h index 179bd4afece4..8ffe81bf66d2 100644 --- a/arch/riscv/include/asm/pgtable-bits.h +++ b/arch/riscv/include/asm/pgtable-bits.h @@ -19,6 +19,25 @@ #define _PAGE_SOFT (3 << 8) /* Reserved for software */ #define _PAGE_SPECIAL (1 << 8) /* RSW: 0x1 */ + +#ifdef CONFIG_MEM_SOFT_DIRTY + +/* ext_svrsw60t59b: bit 59 for software dirty tracking */ +#define _PAGE_SOFT_DIRTY \ + ((riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)) ? \ + (1UL << 59) : 0) +/* + * Bit 3 is always zero for swap entry computation, so we + * can borrow it for swap page soft-dirty tracking. + */ +#define _PAGE_SWP_SOFT_DIRTY \ + ((riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)) ? \ + _PAGE_EXEC : 0) +#else +#define _PAGE_SOFT_DIRTY 0 +#define _PAGE_SWP_SOFT_DIRTY 0 +#endif /* CONFIG_MEM_SOFT_DIRTY */ + #define _PAGE_TABLE _PAGE_PRESENT /* diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 91697fbf1f90..b2d00d129d81 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -427,7 +427,7 @@ static inline pte_t pte_mkwrite_novma(pte_t pte) static inline pte_t pte_mkdirty(pte_t pte) { - return __pte(pte_val(pte) | _PAGE_DIRTY); + return __pte(pte_val(pte) | _PAGE_DIRTY | _PAGE_SOFT_DIRTY); } static inline pte_t pte_mkclean(pte_t pte) @@ -455,6 +455,40 @@ static inline pte_t pte_mkhuge(pte_t pte) return pte; } +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +#define pte_soft_dirty_available() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B) + +static inline bool pte_soft_dirty(pte_t pte) +{ + return !!(pte_val(pte) & _PAGE_SOFT_DIRTY); +} + +static inline pte_t pte_mksoft_dirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_SOFT_DIRTY); +} + +static inline pte_t pte_clear_soft_dirty(pte_t pte) +{ + return __pte(pte_val(pte) & ~(_PAGE_SOFT_DIRTY)); +} + +static inline bool pte_swp_soft_dirty(pte_t pte) +{ + return !!(pte_val(pte) & _PAGE_SWP_SOFT_DIRTY); +} + +static inline pte_t pte_swp_mksoft_dirty(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_SWP_SOFT_DIRTY); +} + +static inline pte_t pte_swp_clear_soft_dirty(pte_t pte) +{ + return __pte(pte_val(pte) & ~(_PAGE_SWP_SOFT_DIRTY)); +} +#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ + #ifdef CONFIG_RISCV_ISA_SVNAPOT #define pte_leaf_size(pte) (pte_napot(pte) ? \ napot_cont_size(napot_cont_order(pte)) :\ @@ -802,6 +836,40 @@ static inline pud_t pud_mkspecial(pud_t pud) } #endif +#ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY +static inline bool pmd_soft_dirty(pmd_t pmd) +{ + return pte_soft_dirty(pmd_pte(pmd)); +} + +static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) +{ + return pte_pmd(pte_mksoft_dirty(pmd_pte(pmd))); +} + +static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) +{ + return pte_pmd(pte_clear_soft_dirty(pmd_pte(pmd))); +} + +#ifdef CONFIG_ARCH_ENABLE_THP_MIGRATION +static inline bool pmd_swp_soft_dirty(pmd_t pmd) +{ + return pte_swp_soft_dirty(pmd_pte(pmd)); +} + +static inline pmd_t pmd_swp_mksoft_dirty(pmd_t pmd) +{ + return pte_pmd(pte_swp_mksoft_dirty(pmd_pte(pmd))); +} + +static inline pmd_t pmd_swp_clear_soft_dirty(pmd_t pmd) +{ + return pte_pmd(pte_swp_clear_soft_dirty(pmd_pte(pmd))); +} +#endif /* CONFIG_ARCH_ENABLE_THP_MIGRATION */ +#endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ + static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmd) { @@ -983,7 +1051,8 @@ static inline pud_t pud_modify(pud_t pud, pgprot_t newprot) * * Format of swap PTE: * bit 0: _PAGE_PRESENT (zero) - * bit 1 to 3: _PAGE_LEAF (zero) + * bit 1 to 2: (zero) + * bit 3: _PAGE_SWP_SOFT_DIRTY * bit 5: _PAGE_PROT_NONE (zero) * bit 6: exclusive marker * bits 7 to 11: swap type -- 2.34.1 The Svrsw60t59b extension allows to free the PTE reserved bits 60 and 59 for software, this patch uses bit 60 for uffd-wp tracking Additionally for tracking the uffd-wp state as a PTE swap bit, we borrow bit 4 which is not involved into swap entry computation. Signed-off-by: Chunyan Zhang --- arch/riscv/Kconfig | 1 + arch/riscv/include/asm/pgtable-bits.h | 18 +++++++ arch/riscv/include/asm/pgtable.h | 67 +++++++++++++++++++++++++++ 3 files changed, 86 insertions(+) diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 53b73e4bdf3f..f928768bb14a 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -147,6 +147,7 @@ config RISCV select HAVE_ARCH_TRANSPARENT_HUGEPAGE if 64BIT && MMU select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if 64BIT && MMU select HAVE_ARCH_USERFAULTFD_MINOR if 64BIT && USERFAULTFD + select HAVE_ARCH_USERFAULTFD_WP if 64BIT && MMU && USERFAULTFD && RISCV_ISA_SVRSW60T59B select HAVE_ARCH_VMAP_STACK if MMU && 64BIT select HAVE_ASM_MODVERSIONS select HAVE_CONTEXT_TRACKING_USER diff --git a/arch/riscv/include/asm/pgtable-bits.h b/arch/riscv/include/asm/pgtable-bits.h index 8ffe81bf66d2..894b2a24fc49 100644 --- a/arch/riscv/include/asm/pgtable-bits.h +++ b/arch/riscv/include/asm/pgtable-bits.h @@ -38,6 +38,24 @@ #define _PAGE_SWP_SOFT_DIRTY 0 #endif /* CONFIG_MEM_SOFT_DIRTY */ +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP + +/* ext_svrsw60t59b: Bit(60) for uffd-wp tracking */ +#define _PAGE_UFFD_WP \ + ((riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)) ? \ + (1UL << 60) : 0) +/* + * Bit 4 is not involved into swap entry computation, so we + * can borrow it for swap page uffd-wp tracking. + */ +#define _PAGE_SWP_UFFD_WP \ + ((riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B)) ? \ + _PAGE_USER : 0) +#else +#define _PAGE_UFFD_WP 0 +#define _PAGE_SWP_UFFD_WP 0 +#endif + #define _PAGE_TABLE _PAGE_PRESENT /* diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index b2d00d129d81..94cc97d3dbff 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -416,6 +416,40 @@ static inline pte_t pte_wrprotect(pte_t pte) return __pte(pte_val(pte) & ~(_PAGE_WRITE)); } +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +#define pte_uffd_wp_available() riscv_has_extension_unlikely(RISCV_ISA_EXT_SVRSW60T59B) + +static inline bool pte_uffd_wp(pte_t pte) +{ + return !!(pte_val(pte) & _PAGE_UFFD_WP); +} + +static inline pte_t pte_mkuffd_wp(pte_t pte) +{ + return pte_wrprotect(__pte(pte_val(pte) | _PAGE_UFFD_WP)); +} + +static inline pte_t pte_clear_uffd_wp(pte_t pte) +{ + return __pte(pte_val(pte) & ~(_PAGE_UFFD_WP)); +} + +static inline bool pte_swp_uffd_wp(pte_t pte) +{ + return !!(pte_val(pte) & _PAGE_SWP_UFFD_WP); +} + +static inline pte_t pte_swp_mkuffd_wp(pte_t pte) +{ + return __pte(pte_val(pte) | _PAGE_SWP_UFFD_WP); +} + +static inline pte_t pte_swp_clear_uffd_wp(pte_t pte) +{ + return __pte(pte_val(pte) & ~(_PAGE_SWP_UFFD_WP)); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + /* static inline pte_t pte_mkread(pte_t pte) */ static inline pte_t pte_mkwrite_novma(pte_t pte) @@ -836,6 +870,38 @@ static inline pud_t pud_mkspecial(pud_t pud) } #endif +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_WP +static inline bool pmd_uffd_wp(pmd_t pmd) +{ + return pte_uffd_wp(pmd_pte(pmd)); +} + +static inline pmd_t pmd_mkuffd_wp(pmd_t pmd) +{ + return pte_pmd(pte_mkuffd_wp(pmd_pte(pmd))); +} + +static inline pmd_t pmd_clear_uffd_wp(pmd_t pmd) +{ + return pte_pmd(pte_clear_uffd_wp(pmd_pte(pmd))); +} + +static inline bool pmd_swp_uffd_wp(pmd_t pmd) +{ + return pte_swp_uffd_wp(pmd_pte(pmd)); +} + +static inline pmd_t pmd_swp_mkuffd_wp(pmd_t pmd) +{ + return pte_pmd(pte_swp_mkuffd_wp(pmd_pte(pmd))); +} + +static inline pmd_t pmd_swp_clear_uffd_wp(pmd_t pmd) +{ + return pte_pmd(pte_swp_clear_uffd_wp(pmd_pte(pmd))); +} +#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_WP */ + #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline bool pmd_soft_dirty(pmd_t pmd) { @@ -1053,6 +1119,7 @@ static inline pud_t pud_modify(pud_t pud, pgprot_t newprot) * bit 0: _PAGE_PRESENT (zero) * bit 1 to 2: (zero) * bit 3: _PAGE_SWP_SOFT_DIRTY + * bit 4: _PAGE_SWP_UFFD_WP * bit 5: _PAGE_PROT_NONE (zero) * bit 6: exclusive marker * bits 7 to 11: swap type -- 2.34.1