What a memcg considers to be a valid toptier node is defined by three criteria: (1) The node has CPUs, (2) The node has online memory, and (3) The node is within the cgroup's cpuset.mems. Of the three, the second and third criteria are the only ones that can change dynamically during runtime, via memory hotplug events and cpuset.mems changes, respectively. Introduce functions to calculate and update toptier capacity, and call them during cpuset.mems changes and memory hotplug events. Signed-off-by: Joshua Hahn --- include/linux/memcontrol.h | 6 ++++++ include/linux/memory-tiers.h | 29 +++++++++++++++++++++++++ include/linux/page_counter.h | 2 ++ kernel/cgroup/cpuset.c | 2 +- mm/memcontrol.c | 17 +++++++++++++++ mm/memory-tiers.c | 41 ++++++++++++++++++++++++++++++++++++ mm/page_counter.c | 8 +++++++ 7 files changed, 104 insertions(+), 1 deletion(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 5173a9f16721..900a36112b62 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -608,6 +608,8 @@ static inline void mem_cgroup_protection(struct mem_cgroup *root, void mem_cgroup_calculate_protection(struct mem_cgroup *root, struct mem_cgroup *memcg); +void update_memcg_toptier_capacity(void); + static inline bool mem_cgroup_unprotected(struct mem_cgroup *target, struct mem_cgroup *memcg) { @@ -1116,6 +1118,10 @@ static inline void mem_cgroup_calculate_protection(struct mem_cgroup *root, { } +static inline void update_memcg_toptier_capacity(void) +{ +} + static inline bool mem_cgroup_unprotected(struct mem_cgroup *target, struct mem_cgroup *memcg) { diff --git a/include/linux/memory-tiers.h b/include/linux/memory-tiers.h index 85440473effb..cf616885e0db 100644 --- a/include/linux/memory-tiers.h +++ b/include/linux/memory-tiers.h @@ -53,6 +53,9 @@ int mt_perf_to_adistance(struct access_coordinate *perf, int *adist); struct memory_dev_type *mt_find_alloc_memory_type(int adist, struct list_head *memory_types); void mt_put_memory_types(struct list_head *memory_types); +void mt_get_toptier_nodemask(nodemask_t *mask, const nodemask_t *allowed); +unsigned long mt_get_toptier_capacity(const nodemask_t *allowed); +unsigned long mt_get_total_capacity(const nodemask_t *allowed); #ifdef CONFIG_MIGRATION int next_demotion_node(int node, const nodemask_t *allowed_mask); void node_get_allowed_targets(pg_data_t *pgdat, nodemask_t *targets); @@ -152,5 +155,31 @@ static inline struct memory_dev_type *mt_find_alloc_memory_type(int adist, static inline void mt_put_memory_types(struct list_head *memory_types) { } + +static inline void mt_get_toptier_nodemask(nodemask_t *mask, + const nodemask_t *allowed) +{ + *mask = node_states[N_MEMORY]; + if (allowed) + nodes_and(*mask, *mask, *allowed); +} + +static inline unsigned long mt_get_toptier_capacity(const nodemask_t *allowed) +{ + int nid; + unsigned long capacity = 0; + + for_each_node_state(nid, N_MEMORY) { + if (allowed && !node_isset(nid, *allowed)) + continue; + capacity += NODE_DATA(nid)->node_present_pages; + } + return capacity; +} + +static inline unsigned long mt_get_total_capacity(const nodemask_t *allowed) +{ + return mt_get_toptier_capacity(allowed); +} #endif /* CONFIG_NUMA */ #endif /* _LINUX_MEMORY_TIERS_H */ diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 128c1272c88c..ada5f1dd75d4 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -121,6 +121,8 @@ static inline void page_counter_reset_watermark(struct page_counter *counter) void page_counter_calculate_protection(struct page_counter *root, struct page_counter *counter, bool recursive_protection); +void page_counter_update_toptier_capacity(struct page_counter *counter, + const nodemask_t *allowed); unsigned long page_counter_toptier_high(struct page_counter *counter); unsigned long page_counter_toptier_low(struct page_counter *counter); #else diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 7607dfe516e6..e5641dc1af88 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -2620,7 +2620,6 @@ static void update_nodemasks_hier(struct cpuset *cs, nodemask_t *new_mems) rcu_read_lock(); cpuset_for_each_descendant_pre(cp, pos_css, cs) { struct cpuset *parent = parent_cs(cp); - bool has_mems = nodes_and(*new_mems, cp->mems_allowed, parent->effective_mems); /* @@ -2701,6 +2700,7 @@ static int update_nodemask(struct cpuset *cs, struct cpuset *trialcs, /* use trialcs->mems_allowed as a temp variable */ update_nodemasks_hier(cs, &trialcs->mems_allowed); + update_memcg_toptier_capacity(); return 0; } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 0be1e823d813..f3e4a6ce7181 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include @@ -3906,6 +3907,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) page_counter_init(&memcg->memory, &parent->memory, memcg_on_dfl); page_counter_init(&memcg->swap, &parent->swap, false); + page_counter_update_toptier_capacity(&memcg->memory, NULL); #ifdef CONFIG_MEMCG_V1 memcg->memory.track_failcnt = !memcg_on_dfl; WRITE_ONCE(memcg->oom_kill_disable, READ_ONCE(parent->oom_kill_disable)); @@ -3917,6 +3919,7 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css) init_memcg_events(); page_counter_init(&memcg->memory, NULL, true); page_counter_init(&memcg->swap, NULL, false); + page_counter_update_toptier_capacity(&memcg->memory, NULL); #ifdef CONFIG_MEMCG_V1 page_counter_init(&memcg->kmem, NULL, false); page_counter_init(&memcg->tcpmem, NULL, false); @@ -4804,6 +4807,20 @@ void mem_cgroup_calculate_protection(struct mem_cgroup *root, page_counter_calculate_protection(&root->memory, &memcg->memory, recursive_protection); } +void update_memcg_toptier_capacity(void) +{ + struct mem_cgroup *memcg; + nodemask_t allowed; + + for_each_mem_cgroup(memcg) { + if (memcg == root_mem_cgroup) + continue; + + cpuset_nodes_allowed(memcg->css.cgroup, &allowed); + page_counter_update_toptier_capacity(&memcg->memory, &allowed); + } +} + static int charge_memcg(struct folio *folio, struct mem_cgroup *memcg, gfp_t gfp) { diff --git a/mm/memory-tiers.c b/mm/memory-tiers.c index a88256381519..259caaf4be8f 100644 --- a/mm/memory-tiers.c +++ b/mm/memory-tiers.c @@ -889,6 +889,7 @@ static int __meminit memtier_hotplug_callback(struct notifier_block *self, mutex_lock(&memory_tier_lock); if (clear_node_memory_tier(nn->nid)) establish_demotion_targets(); + update_memcg_toptier_capacity(); mutex_unlock(&memory_tier_lock); break; case NODE_ADDED_FIRST_MEMORY: @@ -896,6 +897,7 @@ static int __meminit memtier_hotplug_callback(struct notifier_block *self, memtier = set_node_memory_tier(nn->nid); if (!IS_ERR(memtier)) establish_demotion_targets(); + update_memcg_toptier_capacity(); mutex_unlock(&memory_tier_lock); break; } @@ -941,6 +943,45 @@ bool numa_demotion_enabled = false; bool tier_aware_memcg_limits; +void mt_get_toptier_nodemask(nodemask_t *mask, const nodemask_t *allowed) +{ + int nid; + + *mask = NODE_MASK_NONE; + for_each_node_state(nid, N_MEMORY) { + if (node_is_toptier(nid)) + node_set(nid, *mask); + } + if (allowed) + nodes_and(*mask, *mask, *allowed); +} + +unsigned long mt_get_toptier_capacity(const nodemask_t *allowed) +{ + int nid; + unsigned long capacity = 0; + nodemask_t mask; + + mt_get_toptier_nodemask(&mask, allowed); + for_each_node_mask(nid, mask) + capacity += NODE_DATA(nid)->node_present_pages; + + return capacity; +} + +unsigned long mt_get_total_capacity(const nodemask_t *allowed) +{ + int nid; + unsigned long capacity = 0; + + for_each_node_state(nid, N_MEMORY) { + if (allowed && !node_isset(nid, *allowed)) + continue; + capacity += NODE_DATA(nid)->node_present_pages; + } + return capacity; +} + #ifdef CONFIG_MIGRATION #ifdef CONFIG_SYSFS static ssize_t demotion_enabled_show(struct kobject *kobj, diff --git a/mm/page_counter.c b/mm/page_counter.c index 5ec97811c418..cf21c72bfd4e 100644 --- a/mm/page_counter.c +++ b/mm/page_counter.c @@ -11,6 +11,7 @@ #include #include #include +#include #include static bool track_protection(struct page_counter *c) @@ -463,6 +464,13 @@ void page_counter_calculate_protection(struct page_counter *root, recursive_protection)); } +void page_counter_update_toptier_capacity(struct page_counter *counter, + const nodemask_t *allowed) +{ + counter->toptier_capacity = mt_get_toptier_capacity(allowed); + counter->total_capacity = mt_get_total_capacity(allowed); +} + unsigned long page_counter_toptier_high(struct page_counter *counter) { unsigned long high = READ_ONCE(counter->high); -- 2.47.3