From: Ye Bin This patch is prepare for support drop_caches for specify file system. Signed-off-by: Ye Bin --- include/linux/mm.h | 1 + mm/internal.h | 3 +++ mm/shrinker.c | 4 ++-- mm/vmscan.c | 50 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 56 insertions(+), 2 deletions(-) diff --git a/include/linux/mm.h b/include/linux/mm.h index 5be3d8a8f806..5bab9472a758 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -4470,6 +4470,7 @@ static inline int in_gate_area(struct mm_struct *mm, unsigned long addr) bool process_shares_mm(const struct task_struct *p, const struct mm_struct *mm); void drop_slab(void); +void drop_sb_dentry_inode(struct super_block *sb); #ifndef CONFIG_MMU #define randomize_va_space 0 diff --git a/mm/internal.h b/mm/internal.h index cb0af847d7d9..4690a58c4820 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1662,6 +1662,9 @@ void __meminit __init_page_from_nid(unsigned long pfn, int nid); unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg, int priority); +unsigned long do_shrink_slab(struct shrink_control *shrinkctl, + struct shrinker *shrinker, int priority); + int shmem_add_to_page_cache(struct folio *folio, struct address_space *mapping, pgoff_t index, void *expected, gfp_t gfp); diff --git a/mm/shrinker.c b/mm/shrinker.c index 4a93fd433689..075e4393da9c 100644 --- a/mm/shrinker.c +++ b/mm/shrinker.c @@ -368,8 +368,8 @@ static long add_nr_deferred(long nr, struct shrinker *shrinker, #define SHRINK_BATCH 128 -static unsigned long do_shrink_slab(struct shrink_control *shrinkctl, - struct shrinker *shrinker, int priority) +unsigned long do_shrink_slab(struct shrink_control *shrinkctl, + struct shrinker *shrinker, int priority) { unsigned long freed = 0; unsigned long long delta; diff --git a/mm/vmscan.c b/mm/vmscan.c index 01d3364fe506..310bed25df78 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -455,6 +455,56 @@ void drop_slab(void) } while ((freed >> shift++) > 1); } +static unsigned long drop_shrinker_node(int nid, struct shrinker *shrinker) +{ + unsigned long freed = 0; + struct mem_cgroup *memcg = NULL; + + memcg = mem_cgroup_iter(NULL, NULL, NULL); + do { + unsigned long ret; + + struct shrink_control sc = { + .gfp_mask = GFP_KERNEL, + .nid = nid, + .memcg = memcg, + }; + + if (!mem_cgroup_disabled() && + !mem_cgroup_is_root(memcg) && + !mem_cgroup_online(memcg)) + continue; + + ret = do_shrink_slab(&sc, shrinker, 0); + if (ret == SHRINK_EMPTY) + ret = 0; + freed += ret; + cond_resched(); + } while ((memcg = mem_cgroup_iter(NULL, memcg, NULL)) != NULL); + + return freed; +} + +void drop_sb_dentry_inode(struct super_block *sb) +{ + int nid; + int shift = 0; + unsigned long freed; + + if (!sb || !sb->s_shrink) + return; + + do { + freed = 0; + + for_each_online_node(nid) { + if (fatal_signal_pending(current)) + return; + freed += drop_shrinker_node(nid, sb->s_shrink); + } + } while ((freed >> shift++) > 1); +} + #define CHECK_RECLAIMER_OFFSET(type) \ do { \ BUILD_BUG_ON(PGSTEAL_##type - PGSTEAL_KSWAPD != \ -- 2.34.1 From: Ye Bin In order to better analyze the issue of file system uninstallation caused by kernel module opening files, it is necessary to perform dentry recycling on a single file system. But now, apart from global dentry recycling, it is not supported to do dentry recycling on a single file system separately. This feature has usage scenarios in problem localization scenarios.At the same time, it also provides users with a slightly fine-grained pagecache/entry recycling mechanism. This patch supports the recycling of pagecache/entry for individual file systems. Signed-off-by: Ye Bin --- fs/drop_caches.c | 125 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 125 insertions(+) diff --git a/fs/drop_caches.c b/fs/drop_caches.c index 49f56a598ecb..0cd8ad9df07a 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include #include "internal.h" /* A global variable is a bit ugly, but it keeps the code simple */ @@ -78,6 +80,124 @@ static int drop_caches_sysctl_handler(const struct ctl_table *table, int write, return 0; } +struct drop_fs_caches_work { + struct callback_head task_work; + dev_t dev; + char *path; + unsigned int ctl; +}; + +static void drop_fs_caches(struct callback_head *twork) +{ + int ret; + struct super_block *sb; + static bool suppress; + struct drop_fs_caches_work *work = container_of(twork, + struct drop_fs_caches_work, task_work); + unsigned int ctl = work->ctl; + dev_t dev = work->dev; + + if (work->path) { + struct path path; + + ret = kern_path(work->path, LOOKUP_FOLLOW, &path); + if (ret) { + pr_err("%s (%d): %s: failed to get path(%s) %d\n", + current->comm, task_pid_nr(current), + __func__, work->path, ret); + goto out; + } + dev = path.dentry->d_sb->s_dev; + /* Make this file's dentry and inode recyclable */ + path_put(&path); + } + + sb = user_get_super(dev, false); + if (!sb) { + pr_err("%s (%d): %s: failed to get dev(%u:%u)'s sb\n", + current->comm, task_pid_nr(current), __func__, + MAJOR(dev), MINOR(dev)); + goto out; + } + + if (ctl & BIT(0)) { + lru_add_drain_all(); + drop_pagecache_sb(sb, NULL); + count_vm_event(DROP_PAGECACHE); + } + + if (ctl & BIT(1)) { + drop_sb_dentry_inode(sb); + count_vm_event(DROP_SLAB); + } + + if (!READ_ONCE(suppress)) { + pr_info("%s (%d): %s: %d %u:%u\n", current->comm, + task_pid_nr(current), __func__, ctl, + MAJOR(sb->s_dev), MINOR(sb->s_dev)); + + if (ctl & BIT(2)) + WRITE_ONCE(suppress, true); + } + + drop_super(sb); +out: + kfree(work->path); + kfree(work); +} + +static int drop_fs_caches_sysctl_handler(const struct ctl_table *table, + int write, void *buffer, + size_t *length, loff_t *ppos) +{ + struct drop_fs_caches_work *work = NULL; + unsigned int major, minor; + unsigned int ctl; + int ret; + char *path = NULL; + + if (!write) + return 0; + + if (sscanf(buffer, "%u %u:%u", &ctl, &major, &minor) != 3) { + path = kstrdup(buffer, GFP_NOFS); + if (!path) { + ret = -ENOMEM; + goto out; + } + + if (sscanf(buffer, "%u %s", &ctl, path) != 2) { + ret = -EINVAL; + goto out; + } + } + + if (ctl < 1 || ctl > 7) { + ret = -EINVAL; + goto out; + } + + work = kzalloc(sizeof(*work), GFP_KERNEL); + if (!work) { + ret = -ENOMEM; + goto out; + } + + init_task_work(&work->task_work, drop_fs_caches); + if (!path) + work->dev = MKDEV(major, minor); + work->path = path; + work->ctl = ctl; + ret = task_work_add(current, &work->task_work, TWA_RESUME); +out: + if (ret) { + kfree(path); + kfree(work); + } + + return ret; +} + static const struct ctl_table drop_caches_table[] = { { .procname = "drop_caches", @@ -88,6 +208,11 @@ static const struct ctl_table drop_caches_table[] = { .extra1 = SYSCTL_ONE, .extra2 = SYSCTL_FOUR, }, + { + .procname = "drop_fs_caches", + .mode = 0200, + .proc_handler = drop_fs_caches_sysctl_handler, + }, }; static int __init init_vm_drop_caches_sysctls(void) -- 2.34.1 From: Ye Bin Add instructions for 'drop_fs_caches sysctl' sysctl in 'vm.rst'. Signed-off-by: Ye Bin --- Documentation/admin-guide/sysctl/vm.rst | 44 +++++++++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/Documentation/admin-guide/sysctl/vm.rst b/Documentation/admin-guide/sysctl/vm.rst index 97e12359775c..76545da53e20 100644 --- a/Documentation/admin-guide/sysctl/vm.rst +++ b/Documentation/admin-guide/sysctl/vm.rst @@ -37,6 +37,7 @@ Currently, these files are in /proc/sys/vm: - dirtytime_expire_seconds - dirty_writeback_centisecs - drop_caches +- drop_fs_caches - enable_soft_offline - extfrag_threshold - highmem_is_dirtyable @@ -286,6 +287,49 @@ used:: These are informational only. They do not mean that anything is wrong with your system. To disable them, echo 4 (bit 2) into drop_caches. +drop_fs_caches +============== + +Writing to this will cause the kernel to drop clean for a specific file system +caches, as well as reclaimable slab objects like dentries and inodes. Once +dropped, their memory becomes free. Except for specifying the device number +or file path for a specific file system, everything else is consistent with +drop_caches. The device number can be viewed through "cat /proc/self/montinfo" +or 'lsblk'. + +To free pagecache:: + + echo "1 MAJOR:MINOR" > /proc/sys/vm/drop_fs_caches +Or + echo "1 /mnt/XX" > /proc/sys/vm/drop_fs_caches + +To free reclaimable slab objects (includes dentries and inodes):: + + echo "2 MAJOR:MINOR" > /proc/sys/vm/drop_fs_caches +Or + echo "2 /mnt/XX" > /proc/sys/vm/drop_fs_caches + +To free slab objects and pagecache:: + + echo "3 MAJOR:MINOR" > /proc/sys/vm/drop_fs_caches +Or + echo "3 /mnt/XX" > /proc/sys/vm/drop_fs_caches + +You may see error messages in your kernel log when incorrect path or device +number provided:: + + echo (1234): drop_fs_caches: failed to get path(/mnt/XX) ERRNO +Or + echo (1234): drop_fs_caches: failed to get dev(MAJOR:MINOR)'s sb + +You may see informational messages in your kernel log when this file is +used:: + + echo (1234): drop_fs_caches: 3 MAJOR:MINOR + +These are informational only. They do not mean that anything is wrong +with your system. To disable them, echo 4 (bit 2) into drop_fs_caches. + enable_soft_offline =================== Correctable memory errors are very common on servers. Soft-offline is kernel's -- 2.34.1