From: Qi Zheng Convert objcg to be per-memcg per-node type, so that when reparent LRU folios later, we can hold the lru lock at the node level, thus avoiding holding too many lru locks at once. Signed-off-by: Qi Zheng Acked-by: Shakeel Butt --- include/linux/memcontrol.h | 23 +++++------ include/linux/sched.h | 2 +- mm/memcontrol.c | 79 +++++++++++++++++++++++--------------- 3 files changed, 62 insertions(+), 42 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d2748e672fd88..57d86decf2830 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -116,6 +116,16 @@ struct mem_cgroup_per_node { unsigned long lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS]; struct mem_cgroup_reclaim_iter iter; + /* + * objcg is wiped out as a part of the objcg repaprenting process. + * orig_objcg preserves a pointer (and a reference) to the original + * objcg until the end of live of memcg. + */ + struct obj_cgroup __rcu *objcg; + struct obj_cgroup *orig_objcg; + /* list of inherited objcgs, protected by objcg_lock */ + struct list_head objcg_list; + #ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC /* slab stats for nmi context */ atomic_t slab_reclaimable; @@ -180,6 +190,7 @@ struct obj_cgroup { struct list_head list; /* protected by objcg_lock */ struct rcu_head rcu; }; + bool is_root; }; /* @@ -258,15 +269,6 @@ struct mem_cgroup { seqlock_t socket_pressure_seqlock; #endif int kmemcg_id; - /* - * memcg->objcg is wiped out as a part of the objcg repaprenting - * process. memcg->orig_objcg preserves a pointer (and a reference) - * to the original objcg until the end of live of memcg. - */ - struct obj_cgroup __rcu *objcg; - struct obj_cgroup *orig_objcg; - /* list of inherited objcgs, protected by objcg_lock */ - struct list_head objcg_list; struct memcg_vmstats_percpu __percpu *vmstats_percpu; @@ -333,7 +335,6 @@ struct mem_cgroup { #define MEMCG_CHARGE_BATCH 64U extern struct mem_cgroup *root_mem_cgroup; -extern struct obj_cgroup *root_obj_cgroup; enum page_memcg_data_flags { /* page->memcg_data is a pointer to an slabobj_ext vector */ @@ -552,7 +553,7 @@ static inline bool mem_cgroup_is_root(struct mem_cgroup *memcg) static inline bool obj_cgroup_is_root(const struct obj_cgroup *objcg) { - return objcg == root_obj_cgroup; + return objcg->is_root; } static inline bool mem_cgroup_disabled(void) diff --git a/include/linux/sched.h b/include/linux/sched.h index a7b4a980eb2f0..7b63b7b74f414 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1533,7 +1533,7 @@ struct task_struct { /* Used by memcontrol for targeted memcg charge: */ struct mem_cgroup *active_memcg; - /* Cache for current->cgroups->memcg->objcg lookups: */ + /* Cache for current->cgroups->memcg->nodeinfo[nid]->objcg lookups: */ struct obj_cgroup *objcg; #endif diff --git a/mm/memcontrol.c b/mm/memcontrol.c index b0519a16f5684..e31c58bc89188 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -84,8 +84,6 @@ EXPORT_SYMBOL(memory_cgrp_subsys); struct mem_cgroup *root_mem_cgroup __read_mostly; EXPORT_SYMBOL(root_mem_cgroup); -struct obj_cgroup *root_obj_cgroup __read_mostly; - /* Active memory cgroup to use from an interrupt context */ DEFINE_PER_CPU(struct mem_cgroup *, int_active_memcg); EXPORT_PER_CPU_SYMBOL_GPL(int_active_memcg); @@ -210,18 +208,21 @@ static struct obj_cgroup *obj_cgroup_alloc(void) } static inline struct obj_cgroup *__memcg_reparent_objcgs(struct mem_cgroup *memcg, - struct mem_cgroup *parent) + struct mem_cgroup *parent, + int nid) { struct obj_cgroup *objcg, *iter; + struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid]; + struct mem_cgroup_per_node *parent_pn = parent->nodeinfo[nid]; - objcg = rcu_replace_pointer(memcg->objcg, NULL, true); + objcg = rcu_replace_pointer(pn->objcg, NULL, true); /* 1) Ready to reparent active objcg. */ - list_add(&objcg->list, &memcg->objcg_list); + list_add(&objcg->list, &pn->objcg_list); /* 2) Reparent active objcg and already reparented objcgs to parent. */ - list_for_each_entry(iter, &memcg->objcg_list, list) + list_for_each_entry(iter, &pn->objcg_list, list) WRITE_ONCE(iter->memcg, parent); /* 3) Move already reparented objcgs to the parent's list */ - list_splice(&memcg->objcg_list, &parent->objcg_list); + list_splice(&pn->objcg_list, &parent_pn->objcg_list); return objcg; } @@ -268,14 +269,17 @@ static void memcg_reparent_objcgs(struct mem_cgroup *memcg) { struct obj_cgroup *objcg; struct mem_cgroup *parent = parent_mem_cgroup(memcg); + int nid; - reparent_locks(memcg, parent); + for_each_node(nid) { + reparent_locks(memcg, parent); - objcg = __memcg_reparent_objcgs(memcg, parent); + objcg = __memcg_reparent_objcgs(memcg, parent, nid); - reparent_unlocks(memcg, parent); + reparent_unlocks(memcg, parent); - percpu_ref_kill(&objcg->refcnt); + percpu_ref_kill(&objcg->refcnt); + } } /* @@ -2877,8 +2881,10 @@ struct mem_cgroup *mem_cgroup_from_virt(void *p) static struct obj_cgroup *__get_obj_cgroup_from_memcg(struct mem_cgroup *memcg) { + int nid = numa_node_id(); + for (; memcg; memcg = parent_mem_cgroup(memcg)) { - struct obj_cgroup *objcg = rcu_dereference(memcg->objcg); + struct obj_cgroup *objcg = rcu_dereference(memcg->nodeinfo[nid]->objcg); if (likely(objcg && obj_cgroup_tryget(objcg))) return objcg; @@ -2942,6 +2948,7 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void) { struct mem_cgroup *memcg; struct obj_cgroup *objcg; + int nid = numa_node_id(); if (IS_ENABLED(CONFIG_MEMCG_NMI_UNSAFE) && in_nmi()) return NULL; @@ -2958,14 +2965,14 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void) * Objcg reference is kept by the task, so it's safe * to use the objcg by the current task. */ - return objcg ? : root_obj_cgroup; + return objcg ? : rcu_dereference_check(root_mem_cgroup->nodeinfo[nid]->objcg, 1); } memcg = this_cpu_read(int_active_memcg); if (unlikely(memcg)) goto from_memcg; - return root_obj_cgroup; + return rcu_dereference_check(root_mem_cgroup->nodeinfo[nid]->objcg, 1); from_memcg: for (; memcg; memcg = parent_mem_cgroup(memcg)) { @@ -2975,12 +2982,12 @@ __always_inline struct obj_cgroup *current_obj_cgroup(void) * away and can be used within the scope without any additional * protection. */ - objcg = rcu_dereference_check(memcg->objcg, 1); + objcg = rcu_dereference_check(memcg->nodeinfo[nid]->objcg, 1); if (likely(objcg)) return objcg; } - return root_obj_cgroup; + return rcu_dereference_check(root_mem_cgroup->nodeinfo[nid]->objcg, 1); } struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio) @@ -3877,6 +3884,8 @@ static bool alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node) if (!pn->lruvec_stats_percpu) goto fail; + INIT_LIST_HEAD(&pn->objcg_list); + lruvec_init(&pn->lruvec); pn->memcg = memcg; @@ -3891,10 +3900,12 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg) { int node; - obj_cgroup_put(memcg->orig_objcg); + for_each_node(node) { + struct mem_cgroup_per_node *pn = memcg->nodeinfo[node]; - for_each_node(node) - free_mem_cgroup_per_node_info(memcg->nodeinfo[node]); + obj_cgroup_put(pn->orig_objcg); + free_mem_cgroup_per_node_info(pn); + } memcg1_free_events(memcg); kfree(memcg->vmstats); free_percpu(memcg->vmstats_percpu); @@ -3965,7 +3976,6 @@ static struct mem_cgroup *mem_cgroup_alloc(struct mem_cgroup *parent) #endif memcg1_memcg_init(memcg); memcg->kmemcg_id = -1; - INIT_LIST_HEAD(&memcg->objcg_list); #ifdef CONFIG_CGROUP_WRITEBACK INIT_LIST_HEAD(&memcg->cgwb_list); for (i = 0; i < MEMCG_CGWB_FRN_CNT; i++) @@ -4042,6 +4052,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) { struct mem_cgroup *memcg = mem_cgroup_from_css(css); struct obj_cgroup *objcg; + int nid; memcg_online_kmem(memcg); @@ -4053,17 +4064,19 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) if (alloc_shrinker_info(memcg)) goto offline_kmem; - objcg = obj_cgroup_alloc(); - if (!objcg) - goto free_shrinker; + for_each_node(nid) { + objcg = obj_cgroup_alloc(); + if (!objcg) + goto free_objcg; - if (unlikely(mem_cgroup_is_root(memcg))) - root_obj_cgroup = objcg; + if (unlikely(mem_cgroup_is_root(memcg))) + objcg->is_root = true; - objcg->memcg = memcg; - rcu_assign_pointer(memcg->objcg, objcg); - obj_cgroup_get(objcg); - memcg->orig_objcg = objcg; + objcg->memcg = memcg; + rcu_assign_pointer(memcg->nodeinfo[nid]->objcg, objcg); + obj_cgroup_get(objcg); + memcg->nodeinfo[nid]->orig_objcg = objcg; + } if (unlikely(mem_cgroup_is_root(memcg)) && !mem_cgroup_disabled()) queue_delayed_work(system_dfl_wq, &stats_flush_dwork, @@ -4087,7 +4100,13 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) xa_store(&mem_cgroup_private_ids, memcg->id.id, memcg, GFP_KERNEL); return 0; -free_shrinker: +free_objcg: + for_each_node(nid) { + struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid]; + + if (pn && pn->orig_objcg) + obj_cgroup_put(pn->orig_objcg); + } free_shrinker_info(memcg); offline_kmem: memcg_offline_kmem(memcg); -- 2.20.1