From: Jiayuan Chen Add a memory.lru_gen interface to cgroup v2 that allows users to interact with MGLRU directly on a specific cgroup without needing to know the internal memcg_id. Read (cat memory.lru_gen): Display lru_gen information for this memcg across all NUMA nodes. Write (echo to memory.lru_gen): Execute aging or eviction commands. Format: cmd nid seq [swappiness] [opt] cmd: '+' for aging, '-' for eviction nid: node id seq: generation sequence number swappiness: optional, or 'max' for anonymous memory only opt: force_scan for aging, nr_to_reclaim for eviction Example: # Show lru_gen info cat /sys/fs/cgroup/mygroup/memory.lru_gen # Run aging on node 0 with seq 100 echo '+ 0 100' > /sys/fs/cgroup/mygroup/memory.lru_gen # Run eviction on node 0 with seq 99, swappiness 50, reclaim 1000 pages echo '- 0 99 50 1000' > /sys/fs/cgroup/mygroup/memory.lru_gen Test result: cgcreate -g memory:test_group // create 1GB page cache, loop access 200MB as hot pages. cgexec -g memory:test_group ./cache_sim & cat /sys/fs/cgroup/test_group/memory.lru_gen node 0 (min_seq=0/0, max_seq=3) 0 48744 0 4 1 48744 0 0 2 48744 24 262144 3 48744 0 0 node 1 (min_seq=0/0, max_seq=3) 0 48744 0 0 1 48744 0 0 2 48744 1 0 3 48744 0 0 // age echo '+ 0 3' > /sys/fs/cgroup/test_group/memory.lru_gen cat /sys/fs/cgroup/test_group/memory.lru_gen memcg 35 /test_group node 0 (min_seq=1/1, max_seq=4) 1 216564 0 3 2 216564 24 262144 3 216564 0 1 4 14936 0 0 node 1 (min_seq=0/0, max_seq=3) 0 216564 0 0 1 216564 0 0 2 216564 1 0 3 216564 0 0 // age echo '+ 0 4' > /sys/fs/cgroup/test_group/memory.lru_gen cat /sys/fs/cgroup/test_group/memory.lru_gen memcg 35 /test_group node 0 (min_seq=2/2, max_seq=5) 2 266880 23 210947 3 266880 0 1 4 65252 1 51200 5 9320 0 0 node 1 (min_seq=0/0, max_seq=3) 0 266880 0 0 1 266880 0 0 2 266880 1 0 3 266880 0 0 // age echo '+ 0 5' > /sys/fs/cgroup/test_group/memory.lru_gen node 0 (min_seq=3/3, max_seq=6) 3 318384 0 210948 4 116756 0 0 5 60824 1 51200 6 3408 23 0 node 1 (min_seq=0/0, max_seq=3) 0 318384 0 0 1 318384 0 0 2 318384 1 0 3 318384 0 0 // reclaim 200000 pages echo '- 0 3 0 200000' > /sys/fs/cgroup/test_group/memory.lru_gen cat /sys/fs/cgroup/test_group/memory.lru_gen node 0 (min_seq=3/3, max_seq=6) 3 760308 0 10884 4 558680 0 0 5 502748 1 51200 6 445332 23 0 node 1 (min_seq=0/0, max_seq=3) 0 760308 0 0 1 760308 0 0 2 760308 1 0 3 760308 0 0 // reclaim 20000 pages echo '- 0 3 0 20000' > /sys/fs/cgroup/test_group/memory.lru_gen cat /sys/fs/cgroup/test_group/memory.lru_gen node 0 (min_seq=3/5, max_seq=6) 3 826864 0 0 4 625236 0 0 5 569304 1 51201 6 511888 23 0 node 1 (min_seq=0/0, max_seq=3) 0 826864 0 0 1 826864 0 0 2 826864 1 0 3 826864 0 0 Signed-off-by: Jiayuan Chen --- include/linux/mmzone.h | 16 +++++++ mm/memcontrol.c | 31 ++++++++++++++ mm/vmscan.c | 94 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 141 insertions(+) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fc5d6c88d2f0..8edb9549b435 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -625,6 +625,11 @@ void lru_gen_offline_memcg(struct mem_cgroup *memcg); void lru_gen_release_memcg(struct mem_cgroup *memcg); void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid); +/* memcg interface */ +struct seq_file; +void lru_gen_seq_show_memcg(struct seq_file *m, struct mem_cgroup *memcg); +int lru_gen_seq_write_memcg(struct mem_cgroup *memcg, char *buf); + #else /* !CONFIG_LRU_GEN */ static inline void lru_gen_init_pgdat(struct pglist_data *pgdat) @@ -664,6 +669,17 @@ static inline void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid) { } +struct seq_file; +static inline void lru_gen_seq_show_memcg(struct seq_file *m, + struct mem_cgroup *memcg) +{ +} + +static inline int lru_gen_seq_write_memcg(struct mem_cgroup *memcg, char *buf) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_LRU_GEN */ struct lruvec { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 75fc22a33b28..e2f13a69b891 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4565,6 +4565,30 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of, return nbytes; } +#ifdef CONFIG_LRU_GEN +static int memory_lru_gen_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_seq(m); + + lru_gen_seq_show_memcg(m, memcg); + return 0; +} + +static ssize_t memory_lru_gen_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + int ret; + + buf = strstrip(buf); + ret = lru_gen_seq_write_memcg(memcg, buf); + if (ret) + return ret; + + return nbytes; +} +#endif + static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { @@ -4649,6 +4673,13 @@ static struct cftype memory_files[] = { .flags = CFTYPE_NS_DELEGATABLE, .write = memory_reclaim, }, +#ifdef CONFIG_LRU_GEN + { + .name = "lru_gen", + .seq_show = memory_lru_gen_show, + .write = memory_lru_gen_write, + }, +#endif { } /* terminate */ }; diff --git a/mm/vmscan.c b/mm/vmscan.c index 8ea5b67daa36..43f38f9f43c5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -5668,6 +5668,100 @@ static const struct file_operations lru_gen_ro_fops = { .release = seq_release, }; +/****************************************************************************** + * memcg interface + ******************************************************************************/ + +void lru_gen_seq_show_memcg(struct seq_file *m, struct mem_cgroup *memcg) +{ + int nid; + char *path; + + path = kvmalloc(PATH_MAX, GFP_KERNEL); +#ifdef CONFIG_MEMCG + if (memcg && path) + cgroup_path(memcg->css.cgroup, path, PATH_MAX); +#endif + seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), + (memcg && path) ? path : ""); + kvfree(path); + + for_each_node_state(nid, N_MEMORY) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + DEFINE_MAX_SEQ(lruvec); + DEFINE_MIN_SEQ(lruvec); + + seq_printf(m, " node %5d (min_seq=%lu/%lu, max_seq=%lu)\n", + nid, min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE], max_seq); + lru_gen_print_lruvec(m, lruvec, max_seq, min_seq, false); + } +} + +int lru_gen_seq_write_memcg(struct mem_cgroup *memcg, char *buf) +{ + char *cur, *next; + unsigned int flags; + struct blk_plug plug; + int err = -EINVAL; + struct scan_control sc = { + .may_writepage = true, + .may_unmap = true, + .may_swap = true, + .reclaim_idx = MAX_NR_ZONES - 1, + .gfp_mask = GFP_KERNEL, + .proactive = true, + .target_mem_cgroup = memcg, + }; + + set_task_reclaim_state(current, &sc.reclaim_state); + flags = memalloc_noreclaim_save(); + blk_start_plug(&plug); + if (!set_mm_walk(NULL, true)) { + err = -ENOMEM; + goto done; + } + + next = buf; + while ((cur = strsep(&next, ",;\n"))) { + int n, end; + char cmd, swap_str[5]; + unsigned int nid, swappiness = -1; + unsigned long seq, opt = -1; + struct lruvec *lruvec; + + cur = skip_spaces(cur); + if (!*cur) + continue; + + n = sscanf(cur, "%c %u %lu %n %4s %n %lu %n", &cmd, &nid, + &seq, &end, swap_str, &end, &opt, &end); + if (n < 3 || cur[end]) { + err = -EINVAL; + break; + } + if (n > 3 && strcmp("max", swap_str) == 0) + swappiness = SWAPPINESS_ANON_ONLY; + else if (n > 3 && kstrtouint(swap_str, 0, &swappiness)) + break; + + if (nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY)) { + err = -EINVAL; + break; + } + + lruvec = get_lruvec(memcg, nid); + err = __run_cmd(cmd, lruvec, seq, &sc, swappiness, opt); + if (err) + break; + } +done: + clear_mm_walk(); + blk_finish_plug(&plug); + memalloc_noreclaim_restore(flags); + set_task_reclaim_state(current, NULL); + return err; +} + /****************************************************************************** * initialization ******************************************************************************/ -- 2.43.0