From: Jiayuan Chen Extract helper functions from debugfs interface for code reuse: - lru_gen_print_lruvec(): Print generations for a single lruvec, extracted from lru_gen_seq_show(). - __run_cmd(): Core command execution logic, extracted from run_cmd(). These helpers will be used by the upcoming memcg interface. No functional change. Signed-off-by: Jiayuan Chen --- mm/vmscan.c | 82 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 48 insertions(+), 34 deletions(-) diff --git a/mm/vmscan.c b/mm/vmscan.c index 619691aa4393..8ea5b67daa36 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -5397,29 +5397,13 @@ static void lru_gen_seq_show_full(struct seq_file *m, struct lruvec *lruvec, seq_putc(m, '\n'); } -/* see Documentation/admin-guide/mm/multigen_lru.rst for details */ -static int lru_gen_seq_show(struct seq_file *m, void *v) +/* Print generations for a single lruvec - helper for debugfs and memcg */ +static void lru_gen_print_lruvec(struct seq_file *m, struct lruvec *lruvec, + unsigned long max_seq, unsigned long *min_seq, + bool full) { unsigned long seq; - bool full = debugfs_get_aux_num(m->file); - struct lruvec *lruvec = v; struct lru_gen_folio *lrugen = &lruvec->lrugen; - int nid = lruvec_pgdat(lruvec)->node_id; - struct mem_cgroup *memcg = lruvec_memcg(lruvec); - DEFINE_MAX_SEQ(lruvec); - DEFINE_MIN_SEQ(lruvec); - - if (nid == first_memory_node) { - const char *path = memcg ? m->private : ""; - -#ifdef CONFIG_MEMCG - if (memcg) - cgroup_path(memcg->css.cgroup, m->private, PATH_MAX); -#endif - seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), path); - } - - seq_printf(m, " node %5d\n", nid); if (!full) seq = evictable_min_seq(min_seq, MAX_SWAPPINESS / 2); @@ -5431,7 +5415,7 @@ static int lru_gen_seq_show(struct seq_file *m, void *v) for (; seq <= max_seq; seq++) { int type, zone; int gen = lru_gen_from_seq(seq); - unsigned long birth = READ_ONCE(lruvec->lrugen.timestamps[gen]); + unsigned long birth = READ_ONCE(lrugen->timestamps[gen]); seq_printf(m, " %10lu %10u", seq, jiffies_to_msecs(jiffies - birth)); @@ -5450,7 +5434,31 @@ static int lru_gen_seq_show(struct seq_file *m, void *v) if (full) lru_gen_seq_show_full(m, lruvec, max_seq, min_seq, seq); } +} + +/* see Documentation/admin-guide/mm/multigen_lru.rst for details */ +static int lru_gen_seq_show(struct seq_file *m, void *v) +{ + bool full = debugfs_get_aux_num(m->file); + struct lruvec *lruvec = v; + int nid = lruvec_pgdat(lruvec)->node_id; + struct mem_cgroup *memcg = lruvec_memcg(lruvec); + DEFINE_MAX_SEQ(lruvec); + DEFINE_MIN_SEQ(lruvec); + + if (nid == first_memory_node) { + const char *path = memcg ? m->private : ""; + +#ifdef CONFIG_MEMCG + if (memcg) + cgroup_path(memcg->css.cgroup, m->private, PATH_MAX); +#endif + seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), path); + } + seq_printf(m, " node %5d\n", nid); + + lru_gen_print_lruvec(m, lruvec, max_seq, min_seq, full); return 0; } @@ -5501,6 +5509,24 @@ static int run_eviction(struct lruvec *lruvec, unsigned long seq, struct scan_co return -EINTR; } +/* Core command execution - helper for debugfs and memcg */ +static int __run_cmd(char cmd, struct lruvec *lruvec, unsigned long seq, + struct scan_control *sc, int swappiness, unsigned long opt) +{ + if (swappiness < MIN_SWAPPINESS) + swappiness = get_swappiness(lruvec, sc); + else if (swappiness > SWAPPINESS_ANON_ONLY) + return -EINVAL; + + switch (cmd) { + case '+': + return run_aging(lruvec, seq, swappiness, opt); + case '-': + return run_eviction(lruvec, seq, sc, swappiness, opt); + } + return -EINVAL; +} + static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq, struct scan_control *sc, int swappiness, unsigned long opt) { @@ -5530,19 +5556,7 @@ static int run_cmd(char cmd, int memcg_id, int nid, unsigned long seq, sc->target_mem_cgroup = memcg; lruvec = get_lruvec(memcg, nid); - if (swappiness < MIN_SWAPPINESS) - swappiness = get_swappiness(lruvec, sc); - else if (swappiness > SWAPPINESS_ANON_ONLY) - goto done; - - switch (cmd) { - case '+': - err = run_aging(lruvec, seq, swappiness, opt); - break; - case '-': - err = run_eviction(lruvec, seq, sc, swappiness, opt); - break; - } + err = __run_cmd(cmd, lruvec, seq, sc, swappiness, opt); done: mem_cgroup_put(memcg); -- 2.43.0 From: Jiayuan Chen Add a memory.lru_gen interface to cgroup v2 that allows users to interact with MGLRU directly on a specific cgroup without needing to know the internal memcg_id. Read (cat memory.lru_gen): Display lru_gen information for this memcg across all NUMA nodes. Write (echo to memory.lru_gen): Execute aging or eviction commands. Format: cmd nid seq [swappiness] [opt] cmd: '+' for aging, '-' for eviction nid: node id seq: generation sequence number swappiness: optional, or 'max' for anonymous memory only opt: force_scan for aging, nr_to_reclaim for eviction Example: # Show lru_gen info cat /sys/fs/cgroup/mygroup/memory.lru_gen # Run aging on node 0 with seq 100 echo '+ 0 100' > /sys/fs/cgroup/mygroup/memory.lru_gen # Run eviction on node 0 with seq 99, swappiness 50, reclaim 1000 pages echo '- 0 99 50 1000' > /sys/fs/cgroup/mygroup/memory.lru_gen Test result: cgcreate -g memory:test_group // create 1GB page cache, loop access 200MB as hot pages. cgexec -g memory:test_group ./cache_sim & cat /sys/fs/cgroup/test_group/memory.lru_gen node 0 (min_seq=0/0, max_seq=3) 0 48744 0 4 1 48744 0 0 2 48744 24 262144 3 48744 0 0 node 1 (min_seq=0/0, max_seq=3) 0 48744 0 0 1 48744 0 0 2 48744 1 0 3 48744 0 0 // age echo '+ 0 3' > /sys/fs/cgroup/test_group/memory.lru_gen cat /sys/fs/cgroup/test_group/memory.lru_gen memcg 35 /test_group node 0 (min_seq=1/1, max_seq=4) 1 216564 0 3 2 216564 24 262144 3 216564 0 1 4 14936 0 0 node 1 (min_seq=0/0, max_seq=3) 0 216564 0 0 1 216564 0 0 2 216564 1 0 3 216564 0 0 // age echo '+ 0 4' > /sys/fs/cgroup/test_group/memory.lru_gen cat /sys/fs/cgroup/test_group/memory.lru_gen memcg 35 /test_group node 0 (min_seq=2/2, max_seq=5) 2 266880 23 210947 3 266880 0 1 4 65252 1 51200 5 9320 0 0 node 1 (min_seq=0/0, max_seq=3) 0 266880 0 0 1 266880 0 0 2 266880 1 0 3 266880 0 0 // age echo '+ 0 5' > /sys/fs/cgroup/test_group/memory.lru_gen node 0 (min_seq=3/3, max_seq=6) 3 318384 0 210948 4 116756 0 0 5 60824 1 51200 6 3408 23 0 node 1 (min_seq=0/0, max_seq=3) 0 318384 0 0 1 318384 0 0 2 318384 1 0 3 318384 0 0 // reclaim 200000 pages echo '- 0 3 0 200000' > /sys/fs/cgroup/test_group/memory.lru_gen cat /sys/fs/cgroup/test_group/memory.lru_gen node 0 (min_seq=3/3, max_seq=6) 3 760308 0 10884 4 558680 0 0 5 502748 1 51200 6 445332 23 0 node 1 (min_seq=0/0, max_seq=3) 0 760308 0 0 1 760308 0 0 2 760308 1 0 3 760308 0 0 // reclaim 20000 pages echo '- 0 3 0 20000' > /sys/fs/cgroup/test_group/memory.lru_gen cat /sys/fs/cgroup/test_group/memory.lru_gen node 0 (min_seq=3/5, max_seq=6) 3 826864 0 0 4 625236 0 0 5 569304 1 51201 6 511888 23 0 node 1 (min_seq=0/0, max_seq=3) 0 826864 0 0 1 826864 0 0 2 826864 1 0 3 826864 0 0 Signed-off-by: Jiayuan Chen --- include/linux/mmzone.h | 16 +++++++ mm/memcontrol.c | 31 ++++++++++++++ mm/vmscan.c | 94 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 141 insertions(+) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index fc5d6c88d2f0..8edb9549b435 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -625,6 +625,11 @@ void lru_gen_offline_memcg(struct mem_cgroup *memcg); void lru_gen_release_memcg(struct mem_cgroup *memcg); void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid); +/* memcg interface */ +struct seq_file; +void lru_gen_seq_show_memcg(struct seq_file *m, struct mem_cgroup *memcg); +int lru_gen_seq_write_memcg(struct mem_cgroup *memcg, char *buf); + #else /* !CONFIG_LRU_GEN */ static inline void lru_gen_init_pgdat(struct pglist_data *pgdat) @@ -664,6 +669,17 @@ static inline void lru_gen_soft_reclaim(struct mem_cgroup *memcg, int nid) { } +struct seq_file; +static inline void lru_gen_seq_show_memcg(struct seq_file *m, + struct mem_cgroup *memcg) +{ +} + +static inline int lru_gen_seq_write_memcg(struct mem_cgroup *memcg, char *buf) +{ + return -EOPNOTSUPP; +} + #endif /* CONFIG_LRU_GEN */ struct lruvec { diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 75fc22a33b28..e2f13a69b891 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -4565,6 +4565,30 @@ static ssize_t memory_oom_group_write(struct kernfs_open_file *of, return nbytes; } +#ifdef CONFIG_LRU_GEN +static int memory_lru_gen_show(struct seq_file *m, void *v) +{ + struct mem_cgroup *memcg = mem_cgroup_from_seq(m); + + lru_gen_seq_show_memcg(m, memcg); + return 0; +} + +static ssize_t memory_lru_gen_write(struct kernfs_open_file *of, char *buf, + size_t nbytes, loff_t off) +{ + struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of)); + int ret; + + buf = strstrip(buf); + ret = lru_gen_seq_write_memcg(memcg, buf); + if (ret) + return ret; + + return nbytes; +} +#endif + static ssize_t memory_reclaim(struct kernfs_open_file *of, char *buf, size_t nbytes, loff_t off) { @@ -4649,6 +4673,13 @@ static struct cftype memory_files[] = { .flags = CFTYPE_NS_DELEGATABLE, .write = memory_reclaim, }, +#ifdef CONFIG_LRU_GEN + { + .name = "lru_gen", + .seq_show = memory_lru_gen_show, + .write = memory_lru_gen_write, + }, +#endif { } /* terminate */ }; diff --git a/mm/vmscan.c b/mm/vmscan.c index 8ea5b67daa36..43f38f9f43c5 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -5668,6 +5668,100 @@ static const struct file_operations lru_gen_ro_fops = { .release = seq_release, }; +/****************************************************************************** + * memcg interface + ******************************************************************************/ + +void lru_gen_seq_show_memcg(struct seq_file *m, struct mem_cgroup *memcg) +{ + int nid; + char *path; + + path = kvmalloc(PATH_MAX, GFP_KERNEL); +#ifdef CONFIG_MEMCG + if (memcg && path) + cgroup_path(memcg->css.cgroup, path, PATH_MAX); +#endif + seq_printf(m, "memcg %5hu %s\n", mem_cgroup_id(memcg), + (memcg && path) ? path : ""); + kvfree(path); + + for_each_node_state(nid, N_MEMORY) { + struct lruvec *lruvec = get_lruvec(memcg, nid); + DEFINE_MAX_SEQ(lruvec); + DEFINE_MIN_SEQ(lruvec); + + seq_printf(m, " node %5d (min_seq=%lu/%lu, max_seq=%lu)\n", + nid, min_seq[LRU_GEN_ANON], min_seq[LRU_GEN_FILE], max_seq); + lru_gen_print_lruvec(m, lruvec, max_seq, min_seq, false); + } +} + +int lru_gen_seq_write_memcg(struct mem_cgroup *memcg, char *buf) +{ + char *cur, *next; + unsigned int flags; + struct blk_plug plug; + int err = -EINVAL; + struct scan_control sc = { + .may_writepage = true, + .may_unmap = true, + .may_swap = true, + .reclaim_idx = MAX_NR_ZONES - 1, + .gfp_mask = GFP_KERNEL, + .proactive = true, + .target_mem_cgroup = memcg, + }; + + set_task_reclaim_state(current, &sc.reclaim_state); + flags = memalloc_noreclaim_save(); + blk_start_plug(&plug); + if (!set_mm_walk(NULL, true)) { + err = -ENOMEM; + goto done; + } + + next = buf; + while ((cur = strsep(&next, ",;\n"))) { + int n, end; + char cmd, swap_str[5]; + unsigned int nid, swappiness = -1; + unsigned long seq, opt = -1; + struct lruvec *lruvec; + + cur = skip_spaces(cur); + if (!*cur) + continue; + + n = sscanf(cur, "%c %u %lu %n %4s %n %lu %n", &cmd, &nid, + &seq, &end, swap_str, &end, &opt, &end); + if (n < 3 || cur[end]) { + err = -EINVAL; + break; + } + if (n > 3 && strcmp("max", swap_str) == 0) + swappiness = SWAPPINESS_ANON_ONLY; + else if (n > 3 && kstrtouint(swap_str, 0, &swappiness)) + break; + + if (nid >= MAX_NUMNODES || !node_state(nid, N_MEMORY)) { + err = -EINVAL; + break; + } + + lruvec = get_lruvec(memcg, nid); + err = __run_cmd(cmd, lruvec, seq, &sc, swappiness, opt); + if (err) + break; + } +done: + clear_mm_walk(); + blk_finish_plug(&plug); + memalloc_noreclaim_restore(flags); + set_task_reclaim_state(current, NULL); + return err; +} + /****************************************************************************** * initialization ******************************************************************************/ -- 2.43.0 From: Jiayuan Chen Add documentation for the memory.lru_gen interface in cgroup v2. For detailed information about MGLRU and the command format, refer to the multi-gen LRU documentation. Signed-off-by: Jiayuan Chen --- Documentation/admin-guide/cgroup-v2.rst | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/Documentation/admin-guide/cgroup-v2.rst b/Documentation/admin-guide/cgroup-v2.rst index 7f5b59d95fce..bb3ca7ffd600 100644 --- a/Documentation/admin-guide/cgroup-v2.rst +++ b/Documentation/admin-guide/cgroup-v2.rst @@ -1426,6 +1426,23 @@ PAGE_SIZE multiple when read back. This means that the networking layer will not adapt based on reclaim induced by memory.reclaim. + memory.lru_gen + A read-write file that exists when CONFIG_LRU_GEN is enabled. + + Reading this file displays the multi-gen LRU information for + this memcg, including generation numbers, page counts for + anonymous and file pages across all NUMA nodes. + + Writing to this file allows performing aging or eviction + operations on this memcg. The format is:: + + echo ' [ []]' > memory.lru_gen + + This interface provides the same functionality as the debugfs + lru_gen interface but operates directly on the cgroup without + requiring the memcg_id. For detailed documentation of the + command format and MGLRU, see Documentation/admin-guide/mm/multigen_lru.rst. + The following nested keys are defined. ========== ================================ -- 2.43.0