A new scalar value (PTEAScanScale) to control per task PTE A bit scanning is introduced. 0 : scanning disabled 1-10 : scanning enabled. In future PTEAScanScale could be used to control aggressiveness of scanning. CC: linux-doc@vger.kernel.org CC: Jonathan Corbet CC: linux-fsdevel@vger.kernel.org Suggested-by: David Rientjes Signed-off-by: Raghavendra K T --- Documentation/filesystems/proc.rst | 2 ++ fs/proc/task_mmu.c | 4 ++++ include/linux/mm_types.h | 3 +++ include/uapi/linux/prctl.h | 7 +++++++ kernel/fork.c | 4 ++++ kernel/sys.c | 25 +++++++++++++++++++++++++ mm/kscand.c | 5 +++++ 7 files changed, 50 insertions(+) diff --git a/Documentation/filesystems/proc.rst b/Documentation/filesystems/proc.rst index 5236cb52e357..0e99d1ca229a 100644 --- a/Documentation/filesystems/proc.rst +++ b/Documentation/filesystems/proc.rst @@ -205,6 +205,7 @@ read the file /proc/PID/status:: VmLib: 1412 kB VmPTE: 20 kb VmSwap: 0 kB + PTEAScanScale: 0 HugetlbPages: 0 kB CoreDumping: 0 THP_enabled: 1 @@ -288,6 +289,7 @@ It's slow but very precise. VmPTE size of page table entries VmSwap amount of swap used by anonymous private data (shmem swap usage is not included) + PTEAScanScale Integer representing async PTE A bit scan agrression HugetlbPages size of hugetlb memory portions CoreDumping process's memory is currently being dumped (killing the process may lead to a corrupted core) diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c index 751479eb128f..05be24e4bc4f 100644 --- a/fs/proc/task_mmu.c +++ b/fs/proc/task_mmu.c @@ -79,6 +79,10 @@ void task_mem(struct seq_file *m, struct mm_struct *mm) " kB\nVmPTE:\t", mm_pgtables_bytes(mm) >> 10, 8); SEQ_PUT_DEC(" kB\nVmSwap:\t", swap); seq_puts(m, " kB\n"); +#ifdef CONFIG_KSCAND + seq_put_decimal_ull_width(m, "PTEAScanScale:\t", mm->pte_scan_scale, 8); + seq_puts(m, "\n"); +#endif hugetlb_report_usage(m, mm); } #undef SEQ_PUT_DEC diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e3d8f11a5a04..798e6053eebe 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -1151,6 +1151,9 @@ struct mm_struct { #ifdef CONFIG_KSCAND /* Tracks promotion node. XXX: use nodemask */ int target_node; + + /* Integer representing PTE A bit scan aggression (0-10) */ + unsigned int pte_scan_scale; #endif /* * An operation with batched TLB flushing is going on. Anything diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index 43dec6eed559..6b5877865e08 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -371,4 +371,11 @@ struct prctl_mm_map { # define PR_FUTEX_HASH_GET_SLOTS 2 # define PR_FUTEX_HASH_GET_IMMUTABLE 3 +/* Set/get PTE A bit scan scale */ +#define PR_SET_PTE_A_SCAN_SCALE 79 +#define PR_GET_PTE_A_SCAN_SCALE 80 +# define PR_PTE_A_SCAN_SCALE_MIN 0 +# define PR_PTE_A_SCAN_SCALE_MAX 10 +# define PR_PTE_A_SCAN_SCALE_DEFAULT 8 + #endif /* _LINUX_PRCTL_H */ diff --git a/kernel/fork.c b/kernel/fork.c index a13043de91b0..bb780215024c 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -106,6 +106,7 @@ #include #include #include +#include #include #include @@ -1050,6 +1051,9 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p, futex_mm_init(mm); #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && !defined(CONFIG_SPLIT_PMD_PTLOCKS) mm->pmd_huge_pte = NULL; +#endif +#ifdef CONFIG_KSCAND + mm->pte_scan_scale = PR_PTE_A_SCAN_SCALE_DEFAULT; #endif mm_init_uprobes_state(mm); hugetlb_count_init(mm); diff --git a/kernel/sys.c b/kernel/sys.c index adc0de0aa364..f6c893b22bc6 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -2147,6 +2147,19 @@ static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr, return 0; } +#ifdef CONFIG_KSCAND +static int prctl_pte_scan_scale_write(unsigned int scale) +{ + scale = clamp(scale, PR_PTE_A_SCAN_SCALE_MIN, PR_PTE_A_SCAN_SCALE_MAX); + current->mm->pte_scan_scale = scale; + return 0; +} + +static unsigned int prctl_pte_scan_scale_read(void) +{ + return current->mm->pte_scan_scale; +} +#endif static int prctl_set_mm(int opt, unsigned long addr, unsigned long arg4, unsigned long arg5) @@ -2824,6 +2837,18 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3, case PR_FUTEX_HASH: error = futex_hash_prctl(arg2, arg3, arg4); break; +#ifdef CONFIG_KSCAND + case PR_SET_PTE_A_SCAN_SCALE: + if (arg3 || arg4 || arg5) + return -EINVAL; + error = prctl_pte_scan_scale_write((unsigned int) arg2); + break; + case PR_GET_PTE_A_SCAN_SCALE: + if (arg2 || arg3 || arg4 || arg5) + return -EINVAL; + error = prctl_pte_scan_scale_read(); + break; +#endif default: trace_task_prctl_unknown(option, arg2, arg3, arg4, arg5); error = -EINVAL; diff --git a/mm/kscand.c b/mm/kscand.c index 273306f47553..8aef6021c6ba 100644 --- a/mm/kscand.c +++ b/mm/kscand.c @@ -1306,6 +1306,11 @@ static unsigned long kscand_scan_mm_slot(void) goto outerloop; } + if (!mm->pte_scan_scale) { + next_mm = true; + goto outerloop; + } + mm_target_node = READ_ONCE(mm->target_node); if (mm_target_node != mm_slot_target_node) WRITE_ONCE(mm->target_node, mm_slot_target_node); -- 2.34.1