In order to lay the groundwork for VMA flags being a bitmap rather than a system word in size, we need to be able to consistently refer to VMA flags by bit number rather than value. Take this opportunity to do so in an enum which we which is additionally useful for tooling to extract metadata from. This additionally makes it very clear which bits are being used for what at a glance. We use the VMA_ prefix for the bit values as it is logical to do so since these reference VMAs. We consistently suffix with _BIT to make it clear what the values refer to. We place all bits 32+ in an #ifdef CONFIG_64BIT block as indeed these all require a 64-bit system and it's neater and self-documenting to do so. We declare a sparse-bitwise type vma_flag_t which ensures that users can't pass around invalid VMA flags by accident and prepares for future work towards VMA flags being a bitmap where we want to ensure bit values are type safe. Finally, we have to update some rather silly if-deffery found in mm/task_mmu.c which would otherwise break. Additionally, update the VMA userland testing vma_internal.h header to include these changes. Signed-off-by: Lorenzo Stoakes --- fs/proc/task_mmu.c | 4 +- include/linux/mm.h | 286 +++++++++++++++++--------- tools/testing/vma/vma_internal.h | 341 +++++++++++++++++++++++++++---- 3 files changed, 488 insertions(+), 143 deletions(-) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index db16ed91c269..c113a3eb5cbd 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -1182,10 +1182,10 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma) [ilog2(VM_PKEY_BIT0)] = "", [ilog2(VM_PKEY_BIT1)] = "", [ilog2(VM_PKEY_BIT2)] = "", -#if VM_PKEY_BIT3 +#if CONFIG_ARCH_PKEY_BITS > 3 [ilog2(VM_PKEY_BIT3)] = "", #endif -#if VM_PKEY_BIT4 +#if CONFIG_ARCH_PKEY_BITS > 4 [ilog2(VM_PKEY_BIT4)] = "", #endif #endif /* CONFIG_ARCH_HAS_PKEYS */ diff --git a/include/linux/mm.h b/include/linux/mm.h index a8811ba57150..bb0d8a1d1d73 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -271,94 +271,172 @@ extern struct rw_semaphore nommu_region_sem; extern unsigned int kobjsize(const void *objp); #endif +/** + * vma_flag_t - specifies an individual VMA flag by bit number. + * + * This value is made type safe by sparse to avoid passing invalid flag values + * around. + */ +typedef int __bitwise vma_flag_t; + +enum { + /* currently active flags */ + VMA_READ_BIT = (__force vma_flag_t)0, + VMA_WRITE_BIT = (__force vma_flag_t)1, + VMA_EXEC_BIT = (__force vma_flag_t)2, + VMA_SHARED_BIT = (__force vma_flag_t)3, + + /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */ + VMA_MAYREAD_BIT = (__force vma_flag_t)4, /* limits for mprotect() etc */ + VMA_MAYWRITE_BIT = (__force vma_flag_t)5, + VMA_MAYEXEC_BIT = (__force vma_flag_t)6, + VMA_MAYSHARE_BIT = (__force vma_flag_t)7, + + VMA_GROWSDOWN_BIT = (__force vma_flag_t)8, /* general info on the segment */ +#ifdef CONFIG_MMU + VMA_UFFD_MISSING_BIT = (__force vma_flag_t)9, /* missing pages tracking */ +#else + /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */ + VMA_MAYOVERLAY_BIT = (__force vma_flag_t)9, +#endif + /* Page-ranges managed without "struct page", just pure PFN */ + VMA_PFNMAP_BIT = (__force vma_flag_t)10, + + VMA_MAYBE_GUARD_BIT = (__force vma_flag_t)11, + + VMA_UFFD_WP_BIT = (__force vma_flag_t)12, /* wrprotect pages tracking */ + + VMA_LOCKED_BIT = (__force vma_flag_t)13, + VMA_IO_BIT = (__force vma_flag_t)14, /* Memory mapped I/O or similar */ + + /* Used by madvise() */ + VMA_SEQ_READ_BIT = (__force vma_flag_t)15, /* App will access data sequentially */ + VMA_RAND_READ_BIT = (__force vma_flag_t)16, /* App will not benefit from clustered reads */ + + VMA_DONTCOPY_BIT = (__force vma_flag_t)17, /* Do not copy this vma on fork */ + VMA_DONTEXPAND_BIT = (__force vma_flag_t)18, /* Cannot expand with mremap() */ + VMA_LOCKONFAULT_BIT = (__force vma_flag_t)19, /* Lock pages covered when faulted in */ + VMA_ACCOUNT_BIT = (__force vma_flag_t)20, /* Is a VM accounted object */ + VMA_NORESERVE_BIT = (__force vma_flag_t)21, /* should the VM suppress accounting */ + VMA_HUGETLB_BIT = (__force vma_flag_t)22, /* Huge TLB Page VM */ + VMA_SYNC_BIT = (__force vma_flag_t)23, /* Synchronous page faults */ + VMA_ARCH_1_BIT = (__force vma_flag_t)24, /* Architecture-specific flag */ + VMA_WIPEONFORK_BIT = (__force vma_flag_t)25, /* Wipe VMA contents in child. */ + VMA_DONTDUMP_BIT = (__force vma_flag_t)26, /* Do not include in the core dump */ + +#ifdef CONFIG_MEM_SOFT_DIRTY + VMA_SOFTDIRTY_BIT = (__force vma_flag_t)27, /* Not soft dirty clean area */ +#endif + + VMA_MIXEDMAP_BIT = (__force vma_flag_t)28, /* Can contain struct page and pure PFN pages */ + VMA_HUGEPAGE_BIT = (__force vma_flag_t)29, /* MADV_HUGEPAGE marked this vma */ + VMA_NOHUGEPAGE_BIT = (__force vma_flag_t)30, /* MADV_NOHUGEPAGE marked this vma */ + VMA_MERGEABLE_BIT = (__force vma_flag_t)31, /* KSM may merge identical pages */ + +#ifdef CONFIG_64BIT + /* These bits are reused, we define specific uses below. */ +#ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS + VMA_HIGH_ARCH_0_BIT = (__force vma_flag_t)32, + VMA_HIGH_ARCH_1_BIT = (__force vma_flag_t)33, + VMA_HIGH_ARCH_2_BIT = (__force vma_flag_t)34, + VMA_HIGH_ARCH_3_BIT = (__force vma_flag_t)35, + VMA_HIGH_ARCH_4_BIT = (__force vma_flag_t)36, + VMA_HIGH_ARCH_5_BIT = (__force vma_flag_t)37, + VMA_HIGH_ARCH_6_BIT = (__force vma_flag_t)38, +#endif + + VMA_ALLOW_ANY_UNCACHED_BIT = (__force vma_flag_t)39, + VMA_DROPPABLE_BIT = (__force vma_flag_t)40, + +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR + VMA_UFFD_MINOR_BIT = (__force vma_flag_t)41, +#endif + + VMA_SEALED_BIT = (__force vma_flag_t)42, +#endif /* CONFIG_64BIT */ +}; + +#define VMA_BIT(bit) BIT((__force int)bit) + /* * vm_flags in vm_area_struct, see mm_types.h. * When changing, update also include/trace/events/mmflags.h */ #define VM_NONE 0x00000000 -#define VM_READ 0x00000001 /* currently active flags */ -#define VM_WRITE 0x00000002 -#define VM_EXEC 0x00000004 -#define VM_SHARED 0x00000008 +#define VM_READ VMA_BIT(VMA_READ_BIT) +#define VM_WRITE VMA_BIT(VMA_WRITE_BIT) +#define VM_EXEC VMA_BIT(VMA_EXEC_BIT) +#define VM_SHARED VMA_BIT(VMA_SHARED_BIT) -/* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */ -#define VM_MAYREAD 0x00000010 /* limits for mprotect() etc */ -#define VM_MAYWRITE 0x00000020 -#define VM_MAYEXEC 0x00000040 -#define VM_MAYSHARE 0x00000080 +#define VM_MAYREAD VMA_BIT(VMA_MAYREAD_BIT) +#define VM_MAYWRITE VMA_BIT(VMA_MAYWRITE_BIT) +#define VM_MAYEXEC VMA_BIT(VMA_MAYEXEC_BIT) +#define VM_MAYSHARE VMA_BIT(VMA_MAYSHARE_BIT) + +#define VM_GROWSDOWN VMA_BIT(VMA_GROWSDOWN_BIT) -#define VM_GROWSDOWN 0x00000100 /* general info on the segment */ #ifdef CONFIG_MMU -#define VM_UFFD_MISSING 0x00000200 /* missing pages tracking */ +#define VM_UFFD_MISSING VMA_BIT(VMA_UFFD_MISSING_BIT) #else /* CONFIG_MMU */ -#define VM_MAYOVERLAY 0x00000200 /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */ #define VM_UFFD_MISSING 0 -#endif /* CONFIG_MMU */ -#define VM_PFNMAP 0x00000400 /* Page-ranges managed without "struct page", just pure PFN */ -#define VM_MAYBE_GUARD 0x00000800 /* The VMA maybe contains guard regions. */ -#define VM_UFFD_WP 0x00001000 /* wrprotect pages tracking */ - -#define VM_LOCKED 0x00002000 -#define VM_IO 0x00004000 /* Memory mapped I/O or similar */ - - /* Used by sys_madvise() */ -#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */ -#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */ - -#define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ -#define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ -#define VM_LOCKONFAULT 0x00080000 /* Lock the pages covered when they are faulted in */ -#define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ -#define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ -#define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ -#define VM_SYNC 0x00800000 /* Synchronous page faults */ -#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ -#define VM_WIPEONFORK 0x02000000 /* Wipe VMA contents in child. */ -#define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ +#endif + +#define VM_PFNMAP VMA_BIT(VMA_PFNMAP_BIT) + +#define VM_MAYBE_GUARD VMA_BIT(VMA_MAYBE_GUARD_BIT) + +#define VM_UFFD_WP VMA_BIT(VMA_UFFD_WP_BIT) + +#define VM_LOCKED VMA_BIT(VMA_LOCKED_BIT) +#define VM_IO VMA_BIT(VMA_IO_BIT) + +#define VM_SEQ_READ VMA_BIT(VMA_SEQ_READ_BIT) +#define VM_RAND_READ VMA_BIT(VMA_RAND_READ_BIT) + +#define VM_DONTCOPY VMA_BIT(VMA_DONTCOPY_BIT) +#define VM_DONTEXPAND VMA_BIT(VMA_DONTEXPAND_BIT) +#define VM_LOCKONFAULT VMA_BIT(VMA_LOCKONFAULT_BIT) +#define VM_ACCOUNT VMA_BIT(VMA_ACCOUNT_BIT) +#define VM_NORESERVE VMA_BIT(VMA_NORESERVE_BIT) +#define VM_HUGETLB VMA_BIT(VMA_HUGETLB_BIT) +#define VM_SYNC VMA_BIT(VMA_SYNC_BIT) +#define VM_ARCH_1 VMA_BIT(VMA_ARCH_1_BIT) +#define VM_WIPEONFORK VMA_BIT(VMA_WIPEONFORK_BIT) +#define VM_DONTDUMP VMA_BIT(VMA_DONTDUMP_BIT) #ifdef CONFIG_MEM_SOFT_DIRTY -# define VM_SOFTDIRTY 0x08000000 /* Not soft dirty clean area */ +#define VM_SOFTDIRTY VMA_BIT(VMA_SOFTDIRTY_BIT) #else -# define VM_SOFTDIRTY 0 +#define VM_SOFTDIRTY 0 #endif -#define VM_MIXEDMAP 0x10000000 /* Can contain "struct page" and pure PFN pages */ -#define VM_HUGEPAGE 0x20000000 /* MADV_HUGEPAGE marked this vma */ -#define VM_NOHUGEPAGE 0x40000000 /* MADV_NOHUGEPAGE marked this vma */ -#define VM_MERGEABLE BIT(31) /* KSM may merge identical pages */ - -#ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS -#define VM_HIGH_ARCH_BIT_0 32 /* bit only usable on 64-bit architectures */ -#define VM_HIGH_ARCH_BIT_1 33 /* bit only usable on 64-bit architectures */ -#define VM_HIGH_ARCH_BIT_2 34 /* bit only usable on 64-bit architectures */ -#define VM_HIGH_ARCH_BIT_3 35 /* bit only usable on 64-bit architectures */ -#define VM_HIGH_ARCH_BIT_4 36 /* bit only usable on 64-bit architectures */ -#define VM_HIGH_ARCH_BIT_5 37 /* bit only usable on 64-bit architectures */ -#define VM_HIGH_ARCH_BIT_6 38 /* bit only usable on 64-bit architectures */ -#define VM_HIGH_ARCH_0 BIT(VM_HIGH_ARCH_BIT_0) -#define VM_HIGH_ARCH_1 BIT(VM_HIGH_ARCH_BIT_1) -#define VM_HIGH_ARCH_2 BIT(VM_HIGH_ARCH_BIT_2) -#define VM_HIGH_ARCH_3 BIT(VM_HIGH_ARCH_BIT_3) -#define VM_HIGH_ARCH_4 BIT(VM_HIGH_ARCH_BIT_4) -#define VM_HIGH_ARCH_5 BIT(VM_HIGH_ARCH_BIT_5) -#define VM_HIGH_ARCH_6 BIT(VM_HIGH_ARCH_BIT_6) -#endif /* CONFIG_ARCH_USES_HIGH_VMA_FLAGS */ +#define VM_MIXEDMAP VMA_BIT(VMA_MIXEDMAP_BIT) +#define VM_HUGEPAGE VMA_BIT(VMA_HUGEPAGE_BIT) +#define VM_NOHUGEPAGE VMA_BIT(VMA_NOHUGEPAGE_BIT) +#define VM_MERGEABLE VMA_BIT(VMA_MERGEABLE_BIT) #ifdef CONFIG_ARCH_HAS_PKEYS -# define VM_PKEY_SHIFT VM_HIGH_ARCH_BIT_0 -# define VM_PKEY_BIT0 VM_HIGH_ARCH_0 -# define VM_PKEY_BIT1 VM_HIGH_ARCH_1 -# define VM_PKEY_BIT2 VM_HIGH_ARCH_2 +#define VMA_PKEY_BIT0_BIT VMA_HIGH_ARCH_0_BIT +#define VMA_PKEY_BIT1_BIT VMA_HIGH_ARCH_1_BIT +#define VMA_PKEY_BIT2_BIT VMA_HIGH_ARCH_2_BIT + +#define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT) + +#define VM_PKEY_BIT0 VMA_BIT(VMA_PKEY_BIT0_BIT) +#define VM_PKEY_BIT1 VMA_BIT(VMA_PKEY_BIT1_BIT) +#define VM_PKEY_BIT2 VMA_BIT(VMA_PKEY_BIT2_BIT) #if CONFIG_ARCH_PKEY_BITS > 3 -# define VM_PKEY_BIT3 VM_HIGH_ARCH_3 +#define VMA_PKEY_BIT3_BIT VMA_HIGH_ARCH_3_BIT +#define VM_PKEY_BIT3 VMA_BIT(VMA_PKEY_BIT3_BIT) #else -# define VM_PKEY_BIT3 0 +#define VM_PKEY_BIT3 0 #endif #if CONFIG_ARCH_PKEY_BITS > 4 -# define VM_PKEY_BIT4 VM_HIGH_ARCH_4 +#define VMA_PKEY_BIT4_BIT VMA_HIGH_ARCH_4_BIT +#define VM_PKEY_BIT4 VMA_BIT(VMA_PKEY_BIT4_BIT) #else -# define VM_PKEY_BIT4 0 +#define VM_PKEY_BIT4 0 #endif #endif /* CONFIG_ARCH_HAS_PKEYS */ @@ -372,53 +450,63 @@ extern unsigned int kobjsize(const void *objp); * (x86). See the comments near alloc_shstk() in arch/x86/kernel/shstk.c * for more details on the guard size. */ -# define VM_SHADOW_STACK VM_HIGH_ARCH_5 +#define VMA_SHADOW_STACK_BIT VMA_HIGH_ARCH_5_BIT +#define VM_SHADOW_STACK VMA_BIT(VMA_SHADOW_STACK_BIT) #endif -#if defined(CONFIG_ARM64_GCS) +#ifdef CONFIG_ARM64_GCS /* * arm64's Guarded Control Stack implements similar functionality and * has similar constraints to shadow stacks. */ -# define VM_SHADOW_STACK VM_HIGH_ARCH_6 +#define VMA_SHADOW_STACK_BIT VMA_HIGH_ARCH_6_BIT +#define VM_SHADOW_STACK VMA_BIT(VMA_SHADOW_STACK_BIT) #endif #ifndef VM_SHADOW_STACK -# define VM_SHADOW_STACK VM_NONE +#define VM_SHADOW_STACK VM_NONE #endif #if defined(CONFIG_PPC64) -# define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ +#define VMA_SAO_BIT VMA_ARCH_1_BIT /* Strong Access Ordering (powerpc) */ +#define VM_SAO VMA_BIT(VMA_SAO_BIT) #elif defined(CONFIG_PARISC) -# define VM_GROWSUP VM_ARCH_1 +#define VMA_GROWSUP_BIT VMA_ARCH_1_BIT +#define VM_GROWSUP VMA_BIT(VMA_GROWSUP_BIT) #elif defined(CONFIG_SPARC64) -# define VM_SPARC_ADI VM_ARCH_1 /* Uses ADI tag for access control */ -# define VM_ARCH_CLEAR VM_SPARC_ADI +#define VMA_SPARC_ADI_BIT VMA_ARCH_1_BIT /* Uses ADI tag for access control */ +#define VMA_ARCH_CLEAR_BIT VMA_ARCH_1_BIT +#define VM_SPARC_ADI VMA_BIT(VMA_SPARC_ADI_BIT) +#define VM_ARCH_CLEAR VMA_BIT(VMA_ARCH_CLEAR_BIT) #elif defined(CONFIG_ARM64) -# define VM_ARM64_BTI VM_ARCH_1 /* BTI guarded page, a.k.a. GP bit */ -# define VM_ARCH_CLEAR VM_ARM64_BTI +#define VMA_ARM64_BTI_BIT VMA_ARCH_1_BIT /* BTI guarded page, a.k.a. GP bit */ +#define VMA_ARCH_CLEAR_BIT VMA_ARCH_1_BIT +#define VM_ARM64_BTI VMA_BIT(VMA_ARM64_BTI_BIT) +#define VM_ARCH_CLEAR VMA_BIT(VMA_ARCH_CLEAR_BIT) #elif !defined(CONFIG_MMU) -# define VM_MAPPED_COPY VM_ARCH_1 /* T if mapped copy of data (nommu mmap) */ +#define VMA_MAPPED_COPY_BIT VMA_ARCH_1_BIT /* T if mapped copy of data (nommu mmap) */ +#define VM_MAPPED_COPY VMA_BIT(VMA_MAPPED_COPY_BIT) #endif #if defined(CONFIG_ARM64_MTE) -# define VM_MTE VM_HIGH_ARCH_4 /* Use Tagged memory for access control */ -# define VM_MTE_ALLOWED VM_HIGH_ARCH_5 /* Tagged memory permitted */ +#define VMA_MTE_BIT VMA_HIGH_ARCH_4_BIT /* Use Tagged memory for access control */ +#define VMA_MTE_ALLOWED_BIT VMA_HIGH_ARCH_5_BIT /* Tagged memory permitted */ +#define VM_MTE VMA_BIT(VMA_MTE_BIT) +#define VM_MTE_ALLOWED VMA_BIT(VMA_MTE_ALLOWED_BIT) #else -# define VM_MTE VM_NONE -# define VM_MTE_ALLOWED VM_NONE +#define VM_MTE VM_NONE +#define VM_MTE_ALLOWED VM_NONE #endif #ifndef VM_GROWSUP -# define VM_GROWSUP VM_NONE +#define VM_GROWSUP VM_NONE #endif #ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR -# define VM_UFFD_MINOR_BIT 41 -# define VM_UFFD_MINOR BIT(VM_UFFD_MINOR_BIT) /* UFFD minor faults */ -#else /* !CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ -# define VM_UFFD_MINOR VM_NONE -#endif /* CONFIG_HAVE_ARCH_USERFAULTFD_MINOR */ +#define VM_UFFD_MINOR VMA_BIT(VMA_UFFD_MINOR_BIT) /* UFFD minor faults */ +#else +#define VM_UFFD_MINOR VM_NONE +#endif /* * This flag is used to connect VFIO to arch specific KVM code. It @@ -428,24 +516,22 @@ extern unsigned int kobjsize(const void *objp); * if KVM does not lock down the memory type. */ #ifdef CONFIG_64BIT -#define VM_ALLOW_ANY_UNCACHED_BIT 39 -#define VM_ALLOW_ANY_UNCACHED BIT(VM_ALLOW_ANY_UNCACHED_BIT) +#define VM_ALLOW_ANY_UNCACHED VMA_BIT(VMA_ALLOW_ANY_UNCACHED_BIT) #else -#define VM_ALLOW_ANY_UNCACHED VM_NONE +#define VM_ALLOW_ANY_UNCACHED VM_NONE #endif #ifdef CONFIG_64BIT -#define VM_DROPPABLE_BIT 40 -#define VM_DROPPABLE BIT(VM_DROPPABLE_BIT) +#define VM_DROPPABLE VMA_BIT(VMA_DROPPABLE_BIT) #elif defined(CONFIG_PPC32) -#define VM_DROPPABLE VM_ARCH_1 +#define VMA_DROPPABLE_BIT VM_ARCH_1_BIT +#define VM_DROPPABLE VMA_BIT(VMA_DROPPABLE_BIT) #else #define VM_DROPPABLE VM_NONE #endif #ifdef CONFIG_64BIT -#define VM_SEALED_BIT 42 -#define VM_SEALED BIT(VM_SEALED_BIT) +#define VM_SEALED VMA_BIT(VMA_SEALED_BIT) #else #define VM_SEALED VM_NONE #endif @@ -474,10 +560,13 @@ extern unsigned int kobjsize(const void *objp); #define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK) #ifdef CONFIG_STACK_GROWSUP -#define VM_STACK VM_GROWSUP -#define VM_STACK_EARLY VM_GROWSDOWN +#define VMA_STACK_BIT VMA_GROWSUP_BIT +#define VMA_STACK_EARLY_BIT VMA_GROWSDOWN_BIT +#define VM_STACK VMA_BIT(VMA_STACK_BIT) +#define VM_STACK_EARLY VMA_BIT(VMA_STACK_EARLY_BIT) #else -#define VM_STACK VM_GROWSDOWN +#define VMA_STACK_BIT VMA_GROWSDOWN_BIT +#define VM_STACK VMA_BIT(VMA_STACK_BIT) #define VM_STACK_EARLY 0 #endif @@ -486,7 +575,6 @@ extern unsigned int kobjsize(const void *objp); /* VMA basic access permission flags */ #define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) - /* * Special vmas that are non-mergable, non-mlock()able. */ @@ -518,7 +606,7 @@ extern unsigned int kobjsize(const void *objp); /* Arch-specific flags to clear when updating VM flags on protection change */ #ifndef VM_ARCH_CLEAR -# define VM_ARCH_CLEAR VM_NONE +#define VM_ARCH_CLEAR VM_NONE #endif #define VM_FLAGS_CLEAR (ARCH_VM_PKEY_FLAGS | VM_ARCH_CLEAR) diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h index 3d9cb3a9411a..7868c419191b 100644 --- a/tools/testing/vma/vma_internal.h +++ b/tools/testing/vma/vma_internal.h @@ -46,43 +46,315 @@ extern unsigned long dac_mmap_min_addr; #define MMF_HAS_MDWE 28 +/** + * vma_flag_t - specifies an individual VMA flag by bit number. + * + * This value is made type safe by sparse to avoid passing invalid flag values + * around. + */ +typedef int __bitwise vma_flag_t; + +enum { + /* currently active flags */ + VMA_READ_BIT = (__force vma_flag_t)0, + VMA_WRITE_BIT = (__force vma_flag_t)1, + VMA_EXEC_BIT = (__force vma_flag_t)2, + VMA_SHARED_BIT = (__force vma_flag_t)3, + + /* mprotect() hardcodes VM_MAYREAD >> 4 == VM_READ, and so for r/w/x bits. */ + VMA_MAYREAD_BIT = (__force vma_flag_t)4, /* limits for mprotect() etc */ + VMA_MAYWRITE_BIT = (__force vma_flag_t)5, + VMA_MAYEXEC_BIT = (__force vma_flag_t)6, + VMA_MAYSHARE_BIT = (__force vma_flag_t)7, + + VMA_GROWSDOWN_BIT = (__force vma_flag_t)8, /* general info on the segment */ +#ifdef CONFIG_MMU + VMA_UFFD_MISSING_BIT = (__force vma_flag_t)9, /* missing pages tracking */ +#else + /* nommu: R/O MAP_PRIVATE mapping that might overlay a file mapping */ + VMA_MAYOVERLAY_BIT = (__force vma_flag_t)9, +#endif + /* Page-ranges managed without "struct page", just pure PFN */ + VMA_PFNMAP_BIT = (__force vma_flag_t)10, + + VMA_MAYBE_GUARD_BIT = (__force vma_flag_t)11, + + VMA_UFFD_WP_BIT = (__force vma_flag_t)12, /* wrprotect pages tracking */ + + VMA_LOCKED_BIT = (__force vma_flag_t)13, + VMA_IO_BIT = (__force vma_flag_t)14, /* Memory mapped I/O or similar */ + + /* Used by madvise() */ + VMA_SEQ_READ_BIT = (__force vma_flag_t)15, /* App will access data sequentially */ + VMA_RAND_READ_BIT = (__force vma_flag_t)16, /* App will not benefit from clustered reads */ + + VMA_DONTCOPY_BIT = (__force vma_flag_t)17, /* Do not copy this vma on fork */ + VMA_DONTEXPAND_BIT = (__force vma_flag_t)18, /* Cannot expand with mremap() */ + VMA_LOCKONFAULT_BIT = (__force vma_flag_t)19, /* Lock pages covered when faulted in */ + VMA_ACCOUNT_BIT = (__force vma_flag_t)20, /* Is a VM accounted object */ + VMA_NORESERVE_BIT = (__force vma_flag_t)21, /* should the VM suppress accounting */ + VMA_HUGETLB_BIT = (__force vma_flag_t)22, /* Huge TLB Page VM */ + VMA_SYNC_BIT = (__force vma_flag_t)23, /* Synchronous page faults */ + VMA_ARCH_1_BIT = (__force vma_flag_t)24, /* Architecture-specific flag */ + VMA_WIPEONFORK_BIT = (__force vma_flag_t)25, /* Wipe VMA contents in child. */ + VMA_DONTDUMP_BIT = (__force vma_flag_t)26, /* Do not include in the core dump */ + +#ifdef CONFIG_MEM_SOFT_DIRTY + VMA_SOFTDIRTY_BIT = (__force vma_flag_t)27, /* Not soft dirty clean area */ +#endif + + VMA_MIXEDMAP_BIT = (__force vma_flag_t)28, /* Can contain struct page and pure PFN pages */ + VMA_HUGEPAGE_BIT = (__force vma_flag_t)29, /* MADV_HUGEPAGE marked this vma */ + VMA_NOHUGEPAGE_BIT = (__force vma_flag_t)30, /* MADV_NOHUGEPAGE marked this vma */ + VMA_MERGEABLE_BIT = (__force vma_flag_t)31, /* KSM may merge identical pages */ + +#ifdef CONFIG_64BIT + /* These bits are reused, we define specific uses below. */ +#ifdef CONFIG_ARCH_USES_HIGH_VMA_FLAGS + VMA_HIGH_ARCH_0_BIT = (__force vma_flag_t)32, + VMA_HIGH_ARCH_1_BIT = (__force vma_flag_t)33, + VMA_HIGH_ARCH_2_BIT = (__force vma_flag_t)34, + VMA_HIGH_ARCH_3_BIT = (__force vma_flag_t)35, + VMA_HIGH_ARCH_4_BIT = (__force vma_flag_t)36, + VMA_HIGH_ARCH_5_BIT = (__force vma_flag_t)37, + VMA_HIGH_ARCH_6_BIT = (__force vma_flag_t)38, +#endif + + VMA_ALLOW_ANY_UNCACHED_BIT = (__force vma_flag_t)39, + VMA_DROPPABLE_BIT = (__force vma_flag_t)40, + +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR + VMA_UFFD_MINOR_BIT = (__force vma_flag_t)41, +#endif + + VMA_SEALED_BIT = (__force vma_flag_t)42, +#endif /* CONFIG_64BIT */ +}; + +#define VMA_BIT(bit) BIT((__force int)bit) + +/* + * vm_flags in vm_area_struct, see mm_types.h. + * When changing, update also include/trace/events/mmflags.h + */ #define VM_NONE 0x00000000 -#define VM_READ 0x00000001 -#define VM_WRITE 0x00000002 -#define VM_EXEC 0x00000004 -#define VM_SHARED 0x00000008 -#define VM_MAYREAD 0x00000010 -#define VM_MAYWRITE 0x00000020 -#define VM_MAYEXEC 0x00000040 -#define VM_GROWSDOWN 0x00000100 -#define VM_PFNMAP 0x00000400 -#define VM_MAYBE_GUARD 0x00000800 -#define VM_LOCKED 0x00002000 -#define VM_IO 0x00004000 -#define VM_SEQ_READ 0x00008000 /* App will access data sequentially */ -#define VM_RAND_READ 0x00010000 /* App will not benefit from clustered reads */ -#define VM_DONTEXPAND 0x00040000 -#define VM_LOCKONFAULT 0x00080000 -#define VM_ACCOUNT 0x00100000 -#define VM_NORESERVE 0x00200000 -#define VM_MIXEDMAP 0x10000000 -#define VM_STACK VM_GROWSDOWN -#define VM_SHADOW_STACK VM_NONE + +#define VM_READ VMA_BIT(VMA_READ_BIT) +#define VM_WRITE VMA_BIT(VMA_WRITE_BIT) +#define VM_EXEC VMA_BIT(VMA_EXEC_BIT) +#define VM_SHARED VMA_BIT(VMA_SHARED_BIT) + +#define VM_MAYREAD VMA_BIT(VMA_MAYREAD_BIT) +#define VM_MAYWRITE VMA_BIT(VMA_MAYWRITE_BIT) +#define VM_MAYEXEC VMA_BIT(VMA_MAYEXEC_BIT) +#define VM_MAYSHARE VMA_BIT(VMA_MAYSHARE_BIT) + +#define VM_GROWSDOWN VMA_BIT(VMA_GROWSDOWN_BIT) + +#ifdef CONFIG_MMU +#define VM_UFFD_MISSING VMA_BIT(VMA_UFFD_MISSING_BIT) +#else /* CONFIG_MMU */ +#define VM_UFFD_MISSING 0 +#endif + +#define VM_PFNMAP VMA_BIT(VMA_PFNMAP_BIT) + +#define VM_MAYBE_GUARD VMA_BIT(VMA_MAYBE_GUARD_BIT) + +#define VM_UFFD_WP VMA_BIT(VMA_UFFD_WP_BIT) + +#define VM_LOCKED VMA_BIT(VMA_LOCKED_BIT) +#define VM_IO VMA_BIT(VMA_IO_BIT) + +#define VM_SEQ_READ VMA_BIT(VMA_SEQ_READ_BIT) +#define VM_RAND_READ VMA_BIT(VMA_RAND_READ_BIT) + +#define VM_DONTCOPY VMA_BIT(VMA_DONTCOPY_BIT) +#define VM_DONTEXPAND VMA_BIT(VMA_DONTEXPAND_BIT) +#define VM_LOCKONFAULT VMA_BIT(VMA_LOCKONFAULT_BIT) +#define VM_ACCOUNT VMA_BIT(VMA_ACCOUNT_BIT) +#define VM_NORESERVE VMA_BIT(VMA_NORESERVE_BIT) +#define VM_HUGETLB VMA_BIT(VMA_HUGETLB_BIT) +#define VM_SYNC VMA_BIT(VMA_SYNC_BIT) +#define VM_ARCH_1 VMA_BIT(VMA_ARCH_1_BIT) +#define VM_WIPEONFORK VMA_BIT(VMA_WIPEONFORK_BIT) +#define VM_DONTDUMP VMA_BIT(VMA_DONTDUMP_BIT) + +#ifdef CONFIG_MEM_SOFT_DIRTY +#define VM_SOFTDIRTY VMA_BIT(VMA_SOFTDIRTY_BIT) +#else #define VM_SOFTDIRTY 0 -#define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ +#endif + +#define VM_MIXEDMAP VMA_BIT(VMA_MIXEDMAP_BIT) +#define VM_HUGEPAGE VMA_BIT(VMA_HUGEPAGE_BIT) +#define VM_NOHUGEPAGE VMA_BIT(VMA_NOHUGEPAGE_BIT) +#define VM_MERGEABLE VMA_BIT(VMA_MERGEABLE_BIT) + +#ifdef CONFIG_ARCH_HAS_PKEYS +#define VMA_PKEY_BIT0_BIT VMA_HIGH_ARCH_0_BIT +#define VMA_PKEY_BIT1_BIT VMA_HIGH_ARCH_1_BIT +#define VMA_PKEY_BIT2_BIT VMA_HIGH_ARCH_2_BIT + +#define VM_PKEY_SHIFT ((__force int)VMA_HIGH_ARCH_0_BIT) + +#define VM_PKEY_BIT0 VMA_BIT(VMA_PKEY_BIT0_BIT) +#define VM_PKEY_BIT1 VMA_BIT(VMA_PKEY_BIT1_BIT) +#define VM_PKEY_BIT2 VMA_BIT(VMA_PKEY_BIT2_BIT) +#if CONFIG_ARCH_PKEY_BITS > 3 +#define VMA_PKEY_BIT3_BIT VMA_HIGH_ARCH_3_BIT +#define VM_PKEY_BIT3 VMA_BIT(VMA_PKEY_BIT3_BIT) +#else +#define VM_PKEY_BIT3 0 +#endif +#if CONFIG_ARCH_PKEY_BITS > 4 +#define VMA_PKEY_BIT4_BIT VMA_HIGH_ARCH_4_BIT +#define VM_PKEY_BIT4 VMA_BIT(VMA_PKEY_BIT4_BIT) +#else +#define VM_PKEY_BIT4 0 +#endif +#endif /* CONFIG_ARCH_HAS_PKEYS */ + +#ifdef CONFIG_X86_USER_SHADOW_STACK +/* + * VM_SHADOW_STACK should not be set with VM_SHARED because of lack of + * support core mm. + * + * These VMAs will get a single end guard page. This helps userspace protect + * itself from attacks. A single page is enough for current shadow stack archs + * (x86). See the comments near alloc_shstk() in arch/x86/kernel/shstk.c + * for more details on the guard size. + */ +#define VMA_SHADOW_STACK_BIT VMA_HIGH_ARCH_5_BIT +#define VM_SHADOW_STACK VMA_BIT(VMA_SHADOW_STACK_BIT) +#endif + +#ifdef CONFIG_ARM64_GCS +/* + * arm64's Guarded Control Stack implements similar functionality and + * has similar constraints to shadow stacks. + */ +#define VMA_SHADOW_STACK_BIT VMA_HIGH_ARCH_6_BIT +#define VM_SHADOW_STACK VMA_BIT(VMA_SHADOW_STACK_BIT) +#endif + +#ifndef VM_SHADOW_STACK +#define VM_SHADOW_STACK VM_NONE +#endif + +#if defined(CONFIG_PPC64) +#define VMA_SAO_BIT VMA_ARCH_1_BIT /* Strong Access Ordering (powerpc) */ +#define VM_SAO VMA_BIT(VMA_SAO_BIT) +#elif defined(CONFIG_PARISC) +#define VMA_GROWSUP_BIT VMA_ARCH_1_BIT +#define VM_GROWSUP VMA_BIT(VMA_GROWSUP_BIT) +#elif defined(CONFIG_SPARC64) +#define VMA_SPARC_ADI_BIT VMA_ARCH_1_BIT /* Uses ADI tag for access control */ +#define VMA_ARCH_CLEAR_BIT VMA_ARCH_1_BIT +#define VM_SPARC_ADI VMA_BIT(VMA_SPARC_ADI_BIT) +#define VM_ARCH_CLEAR VMA_BIT(VMA_ARCH_CLEAR_BIT) +#elif defined(CONFIG_ARM64) +#define VMA_ARM64_BTI_BIT VMA_ARCH_1_BIT /* BTI guarded page, a.k.a. GP bit */ +#define VMA_ARCH_CLEAR_BIT VMA_ARCH_1_BIT +#define VM_ARM64_BTI VMA_BIT(VMA_ARM64_BTI_BIT) +#define VM_ARCH_CLEAR VMA_BIT(VMA_ARCH_CLEAR_BIT) +#elif !defined(CONFIG_MMU) +#define VMA_MAPPED_COPY_BIT VMA_ARCH_1_BIT /* T if mapped copy of data (nommu mmap) */ +#define VM_MAPPED_COPY VMA_BIT(VMA_MAPPED_COPY_BIT) +#endif + +#if defined(CONFIG_ARM64_MTE) +#define VMA_MTE_BIT VMA_HIGH_ARCH_4_BIT /* Use Tagged memory for access control */ +#define VMA_MTE_ALLOWED_BIT VMA_HIGH_ARCH_5_BIT /* Tagged memory permitted */ +#define VM_MTE VMA_BIT(VMA_MTE_BIT) +#define VM_MTE_ALLOWED VMA_BIT(VMA_MTE_ALLOWED_BIT) +#else +#define VM_MTE VM_NONE +#define VM_MTE_ALLOWED VM_NONE +#endif + +#ifndef VM_GROWSUP #define VM_GROWSUP VM_NONE +#endif -#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) -#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) +#ifdef CONFIG_HAVE_ARCH_USERFAULTFD_MINOR +#define VM_UFFD_MINOR VMA_BIT(VMA_UFFD_MINOR_BIT) /* UFFD minor faults */ +#else +#define VM_UFFD_MINOR VM_NONE +#endif + +/* + * This flag is used to connect VFIO to arch specific KVM code. It + * indicates that the memory under this VMA is safe for use with any + * non-cachable memory type inside KVM. Some VFIO devices, on some + * platforms, are thought to be unsafe and can cause machine crashes + * if KVM does not lock down the memory type. + */ +#ifdef CONFIG_64BIT +#define VM_ALLOW_ANY_UNCACHED VMA_BIT(VMA_ALLOW_ANY_UNCACHED_BIT) +#else +#define VM_ALLOW_ANY_UNCACHED VM_NONE +#endif + +#ifdef CONFIG_64BIT +#define VM_DROPPABLE VMA_BIT(VMA_DROPPABLE_BIT) +#elif defined(CONFIG_PPC32) +#define VMA_DROPPABLE_BIT VM_ARCH_1_BIT +#define VM_DROPPABLE VMA_BIT(VMA_DROPPABLE_BIT) +#else +#define VM_DROPPABLE VM_NONE +#endif + +#ifdef CONFIG_64BIT +#define VM_SEALED VMA_BIT(VMA_SEALED_BIT) +#else +#define VM_SEALED VM_NONE +#endif + +/* Bits set in the VMA until the stack is in its final location */ +#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY) + +#define TASK_EXEC ((current->personality & READ_IMPLIES_EXEC) ? VM_EXEC : 0) + +/* Common data flag combinations */ +#define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_FLAGS_NON_EXEC (VM_READ | VM_WRITE | VM_MAYREAD | \ + VM_MAYWRITE | VM_MAYEXEC) +#define VM_DATA_FLAGS_EXEC (VM_READ | VM_WRITE | VM_EXEC | \ + VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) + +#ifndef VM_DATA_DEFAULT_FLAGS /* arch can override this */ +#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_EXEC +#endif + +#ifndef VM_STACK_DEFAULT_FLAGS /* arch can override this */ +#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS +#endif + +#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK) #ifdef CONFIG_STACK_GROWSUP -#define VM_STACK VM_GROWSUP -#define VM_STACK_EARLY VM_GROWSDOWN +#define VMA_STACK_BIT VMA_GROWSUP_BIT +#define VMA_STACK_EARLY_BIT VMA_GROWSDOWN_BIT +#define VM_STACK VMA_BIT(VMA_STACK_BIT) +#define VM_STACK_EARLY VMA_BIT(VMA_STACK_EARLY_BIT) #else -#define VM_STACK VM_GROWSDOWN +#define VMA_STACK_BIT VMA_GROWSDOWN_BIT +#define VM_STACK VMA_BIT(VMA_STACK_BIT) #define VM_STACK_EARLY 0 #endif +#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) + +/* VMA basic access permission flags */ +#define VM_ACCESS_FLAGS (VM_READ | VM_WRITE | VM_EXEC) + +/* + * Special vmas that are non-mergable, non-mlock()able. + */ +#define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) + #define DEFAULT_MAP_WINDOW ((1UL << 47) - PAGE_SIZE) #define TASK_SIZE_LOW DEFAULT_MAP_WINDOW #define TASK_SIZE_MAX DEFAULT_MAP_WINDOW @@ -97,26 +369,11 @@ extern unsigned long dac_mmap_min_addr; #define VM_DATA_FLAGS_TSK_EXEC (VM_READ | VM_WRITE | TASK_EXEC | \ VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC) -#define VM_DATA_DEFAULT_FLAGS VM_DATA_FLAGS_TSK_EXEC - -#define VM_STARTGAP_FLAGS (VM_GROWSDOWN | VM_SHADOW_STACK) - -#define VM_STACK_DEFAULT_FLAGS VM_DATA_DEFAULT_FLAGS -#define VM_STACK_FLAGS (VM_STACK | VM_STACK_DEFAULT_FLAGS | VM_ACCOUNT) -#define VM_STACK_INCOMPLETE_SETUP (VM_RAND_READ | VM_SEQ_READ | VM_STACK_EARLY) - #define RLIMIT_STACK 3 /* max stack size */ #define RLIMIT_MEMLOCK 8 /* max locked-in-memory address space */ #define CAP_IPC_LOCK 14 -#ifdef CONFIG_64BIT -#define VM_SEALED_BIT 42 -#define VM_SEALED BIT(VM_SEALED_BIT) -#else -#define VM_SEALED VM_NONE -#endif - /* Flags which should result in page tables being copied on fork. */ #define VM_COPY_ON_FORK VM_MAYBE_GUARD -- 2.51.0