The list is used to iterate over all the mm and do PTE A bit scanning. mm_slot infrastructure is reused to aid insert and lookup of mm_struct. CC: linux-fsdevel@vger.kernel.org Suggested-by: Bharata B Rao Signed-off-by: Raghavendra K T --- include/linux/kscand.h | 30 +++++++++++++++ kernel/fork.c | 2 + mm/internal.h | 1 + mm/kscand.c | 86 ++++++++++++++++++++++++++++++++++++++++++ mm/mmap.c | 2 + mm/vma_exec.c | 3 ++ 6 files changed, 124 insertions(+) create mode 100644 include/linux/kscand.h diff --git a/include/linux/kscand.h b/include/linux/kscand.h new file mode 100644 index 000000000000..ef9947a33ee5 --- /dev/null +++ b/include/linux/kscand.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_KSCAND_H_ +#define _LINUX_KSCAND_H_ + +#ifdef CONFIG_KSCAND +extern void __kscand_enter(struct mm_struct *mm); +extern void __kscand_exit(struct mm_struct *mm); + +static inline void kscand_execve(struct mm_struct *mm) +{ + __kscand_enter(mm); +} + +static inline void kscand_fork(struct mm_struct *mm, struct mm_struct *oldmm) +{ + __kscand_enter(mm); +} + +static inline void kscand_exit(struct mm_struct *mm) +{ + __kscand_exit(mm); +} +#else /* !CONFIG_KSCAND */ +static inline void __kscand_enter(struct mm_struct *mm) {} +static inline void __kscand_exit(struct mm_struct *mm) {} +static inline void kscand_execve(struct mm_struct *mm) {} +static inline void kscand_fork(struct mm_struct *mm, struct mm_struct *oldmm) {} +static inline void kscand_exit(struct mm_struct *mm) {} +#endif +#endif /* _LINUX_KSCAND_H_ */ diff --git a/kernel/fork.c b/kernel/fork.c index 1ee8eb11f38b..a13043de91b0 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -85,6 +85,7 @@ #include #include #include +#include #include #include #include @@ -1116,6 +1117,7 @@ static inline void __mmput(struct mm_struct *mm) uprobe_clear_state(mm); exit_aio(mm); + kscand_exit(mm); ksm_exit(mm); khugepaged_exit(mm); /* must run before exit_mmap */ exit_mmap(mm); diff --git a/mm/internal.h b/mm/internal.h index 6b8ed2017743..dd86efc54885 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -8,6 +8,7 @@ #define __MM_INTERNAL_H #include +#include #include #include #include diff --git a/mm/kscand.c b/mm/kscand.c index f7bbbc70c86a..d5b0d3041b0f 100644 --- a/mm/kscand.c +++ b/mm/kscand.c @@ -7,12 +7,14 @@ #include #include #include +#include #include #include #include #include #include "internal.h" +#include "mm_slot.h" static struct task_struct *kscand_thread __read_mostly; static DEFINE_MUTEX(kscand_mutex); @@ -29,11 +31,23 @@ static bool need_wakeup; static unsigned long kscand_sleep_expire; +static DEFINE_SPINLOCK(kscand_mm_lock); static DECLARE_WAIT_QUEUE_HEAD(kscand_wait); +#define KSCAND_SLOT_HASH_BITS 10 +static DEFINE_READ_MOSTLY_HASHTABLE(kscand_slots_hash, KSCAND_SLOT_HASH_BITS); + +static struct kmem_cache *kscand_slot_cache __read_mostly; + +/* Per mm information collected to control VMA scanning */ +struct kscand_mm_slot { + struct mm_slot slot; +}; + /* Data structure to keep track of current mm under scan */ struct kscand_scan { struct list_head mm_head; + struct kscand_mm_slot *mm_slot; }; struct kscand_scan kscand_scan = { @@ -69,6 +83,12 @@ static void kscand_wait_work(void) wait_event_timeout(kscand_wait, kscand_should_wakeup(), scan_sleep_jiffies); } + +static inline int kscand_test_exit(struct mm_struct *mm) +{ + return atomic_read(&mm->mm_users) == 0; +} + static void kscand_do_scan(void) { unsigned long iter = 0, mms_to_scan; @@ -109,6 +129,65 @@ static int kscand(void *none) return 0; } +static inline void kscand_destroy(void) +{ + kmem_cache_destroy(kscand_slot_cache); +} + +void __kscand_enter(struct mm_struct *mm) +{ + struct kscand_mm_slot *kscand_slot; + struct mm_slot *slot; + int wakeup; + + /* __kscand_exit() must not run from under us */ + VM_BUG_ON_MM(kscand_test_exit(mm), mm); + + kscand_slot = mm_slot_alloc(kscand_slot_cache); + + if (!kscand_slot) + return; + + slot = &kscand_slot->slot; + + spin_lock(&kscand_mm_lock); + mm_slot_insert(kscand_slots_hash, mm, slot); + + wakeup = list_empty(&kscand_scan.mm_head); + list_add_tail(&slot->mm_node, &kscand_scan.mm_head); + spin_unlock(&kscand_mm_lock); + + mmgrab(mm); + if (wakeup) + wake_up_interruptible(&kscand_wait); +} + +void __kscand_exit(struct mm_struct *mm) +{ + struct kscand_mm_slot *mm_slot; + struct mm_slot *slot; + int free = 0; + + spin_lock(&kscand_mm_lock); + slot = mm_slot_lookup(kscand_slots_hash, mm); + mm_slot = mm_slot_entry(slot, struct kscand_mm_slot, slot); + if (mm_slot && kscand_scan.mm_slot != mm_slot) { + hash_del(&slot->hash); + list_del(&slot->mm_node); + free = 1; + } + + spin_unlock(&kscand_mm_lock); + + if (free) { + mm_slot_free(kscand_slot_cache, mm_slot); + mmdrop(mm); + } else if (mm_slot) { + mmap_write_lock(mm); + mmap_write_unlock(mm); + } +} + static int start_kscand(void) { struct task_struct *kthread; @@ -149,6 +228,12 @@ static int __init kscand_init(void) { int err; + kscand_slot_cache = KMEM_CACHE(kscand_mm_slot, 0); + + if (!kscand_slot_cache) { + pr_err("kscand: kmem_cache error"); + return -ENOMEM; + } err = start_kscand(); if (err) goto err_kscand; @@ -157,6 +242,7 @@ static int __init kscand_init(void) err_kscand: stop_kscand(); + kscand_destroy(); return err; } diff --git a/mm/mmap.c b/mm/mmap.c index 09c563c95112..c9ffe65866de 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -1849,6 +1850,7 @@ __latent_entropy int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm) if (!retval) { mt_set_in_rcu(vmi.mas.tree); ksm_fork(mm, oldmm); + kscand_fork(mm, oldmm); khugepaged_fork(mm, oldmm); } else { diff --git a/mm/vma_exec.c b/mm/vma_exec.c index 2dffb02ed6a2..8576b377f7ad 100644 --- a/mm/vma_exec.c +++ b/mm/vma_exec.c @@ -128,6 +128,8 @@ int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap, if (err) goto err_ksm; + kscand_execve(mm); + /* * Place the stack at the largest stack address the architecture * supports. Later, we'll move this to an appropriate place. We don't @@ -151,6 +153,7 @@ int create_init_stack_vma(struct mm_struct *mm, struct vm_area_struct **vmap, return 0; err: + kscand_exit(mm); ksm_exit(mm); err_ksm: mmap_write_unlock(mm); -- 2.34.1