Also add a config option for the same. High level design: While (1): Scan the slowtier pages belonging to VMAs of a task. Add to migation list. A separate thread: Migrate scanned pages to a toptier node based on heuristics. The overall code is influenced by khugepaged design. Signed-off-by: Raghavendra K T --- mm/Kconfig | 8 +++ mm/Makefile | 1 + mm/kscand.c | 163 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 172 insertions(+) create mode 100644 mm/kscand.c diff --git a/mm/Kconfig b/mm/Kconfig index 781be3240e21..d1e5be76a96e 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -750,6 +750,14 @@ config KSM until a program has madvised that an area is MADV_MERGEABLE, and root has set /sys/kernel/mm/ksm/run to 1 (if CONFIG_SYSFS is set). +config KSCAND + bool "Enable PTE A bit scanning and Migration" + depends on NUMA_BALANCING + help + Enable PTE A bit scanning of page. The option creates a separate + kthread for scanning and migration. Accessed slow-tier pages are + migrated to a regular NUMA node to reduce hot page access latency. + config DEFAULT_MMAP_MIN_ADDR int "Low address space to protect from user allocation" depends on MMU diff --git a/mm/Makefile b/mm/Makefile index 1a7a11d4933d..a16ef2ff3da1 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -97,6 +97,7 @@ obj-$(CONFIG_FAIL_PAGE_ALLOC) += fail_page_alloc.o obj-$(CONFIG_MEMTEST) += memtest.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_NUMA) += memory-tiers.o +obj-$(CONFIG_KSCAND) += kscand.o obj-$(CONFIG_DEVICE_MIGRATION) += migrate_device.o obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o khugepaged.o obj-$(CONFIG_PAGE_COUNTER) += page_counter.o diff --git a/mm/kscand.c b/mm/kscand.c new file mode 100644 index 000000000000..f7bbbc70c86a --- /dev/null +++ b/mm/kscand.c @@ -0,0 +1,163 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include "internal.h" + +static struct task_struct *kscand_thread __read_mostly; +static DEFINE_MUTEX(kscand_mutex); + +/* How long to pause between two scan cycles */ +static unsigned int kscand_scan_sleep_ms __read_mostly = 20; + +/* Max number of mms to scan in one scan cycle */ +#define KSCAND_MMS_TO_SCAN (4 * 1024UL) +static unsigned long kscand_mms_to_scan __read_mostly = KSCAND_MMS_TO_SCAN; + +bool kscand_scan_enabled = true; +static bool need_wakeup; + +static unsigned long kscand_sleep_expire; + +static DECLARE_WAIT_QUEUE_HEAD(kscand_wait); + +/* Data structure to keep track of current mm under scan */ +struct kscand_scan { + struct list_head mm_head; +}; + +struct kscand_scan kscand_scan = { + .mm_head = LIST_HEAD_INIT(kscand_scan.mm_head), +}; + +static inline int kscand_has_work(void) +{ + return !list_empty(&kscand_scan.mm_head); +} + +static inline bool kscand_should_wakeup(void) +{ + bool wakeup = kthread_should_stop() || need_wakeup || + time_after_eq(jiffies, kscand_sleep_expire); + + need_wakeup = false; + + return wakeup; +} + +static void kscand_wait_work(void) +{ + const unsigned long scan_sleep_jiffies = + msecs_to_jiffies(kscand_scan_sleep_ms); + + if (!scan_sleep_jiffies) + return; + + kscand_sleep_expire = jiffies + scan_sleep_jiffies; + + /* Allows kthread to pause scanning */ + wait_event_timeout(kscand_wait, kscand_should_wakeup(), + scan_sleep_jiffies); +} +static void kscand_do_scan(void) +{ + unsigned long iter = 0, mms_to_scan; + + mms_to_scan = READ_ONCE(kscand_mms_to_scan); + + while (true) { + if (unlikely(kthread_should_stop()) || + !READ_ONCE(kscand_scan_enabled)) + break; + + if (kscand_has_work()) + msleep(100); + + iter++; + + if (iter >= mms_to_scan) + break; + cond_resched(); + } +} + +static int kscand(void *none) +{ + while (true) { + if (unlikely(kthread_should_stop())) + break; + + while (!READ_ONCE(kscand_scan_enabled)) { + cpu_relax(); + kscand_wait_work(); + } + + kscand_do_scan(); + + kscand_wait_work(); + } + return 0; +} + +static int start_kscand(void) +{ + struct task_struct *kthread; + + guard(mutex)(&kscand_mutex); + + if (kscand_thread) + return 0; + + kthread = kthread_run(kscand, NULL, "kscand"); + if (IS_ERR(kscand_thread)) { + pr_err("kscand: kthread_run(kscand) failed\n"); + return PTR_ERR(kthread); + } + + kscand_thread = kthread; + pr_info("kscand: Successfully started kscand"); + + if (!list_empty(&kscand_scan.mm_head)) + wake_up_interruptible(&kscand_wait); + + return 0; +} + +static int stop_kscand(void) +{ + guard(mutex)(&kscand_mutex); + + if (kscand_thread) { + kthread_stop(kscand_thread); + kscand_thread = NULL; + } + + return 0; +} + +static int __init kscand_init(void) +{ + int err; + + err = start_kscand(); + if (err) + goto err_kscand; + + return 0; + +err_kscand: + stop_kscand(); + + return err; +} +subsys_initcall(kscand_init); -- 2.34.1