This patch gets rid of MEMCG_KMEM and wires all the "generic" functions by introducing per-node obj_cgroup objects. Note that it does not convert the kmem users to proper per-memcg-per-node accounting now, this is done in upcoming patches. Signed-off-by: Alexandre Ghiti --- include/linux/memcontrol.h | 23 ++++++++++---- include/linux/mmzone.h | 1 + mm/memcontrol.c | 64 ++++++++++++++++++++++++-------------- mm/vmstat.c | 1 + 4 files changed, 59 insertions(+), 30 deletions(-) diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 568ab08f42af..17cf823160e4 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -35,7 +35,6 @@ enum memcg_stat_item { MEMCG_SWAP = NR_VM_NODE_STAT_ITEMS, MEMCG_SOCK, MEMCG_PERCPU_B, - MEMCG_KMEM, MEMCG_ZSWAP_B, MEMCG_ZSWAPPED, MEMCG_ZSWAP_INCOMP, @@ -126,9 +125,10 @@ struct mem_cgroup_per_node { struct list_head objcg_list; #ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC - /* slab stats for nmi context */ + /* slab and kmem stats for nmi context */ atomic_t slab_reclaimable; atomic_t slab_unreclaimable; + atomic_t kmem; #endif }; @@ -190,6 +190,7 @@ struct obj_cgroup { struct rcu_head rcu; }; bool is_root; + int nid; }; /* @@ -254,10 +255,6 @@ struct mem_cgroup { atomic_long_t memory_events[MEMCG_NR_MEMORY_EVENTS]; atomic_long_t memory_events_local[MEMCG_NR_MEMORY_EVENTS]; -#ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC - /* MEMCG_KMEM for nmi context */ - atomic_t kmem_stat; -#endif /* * Hint of reclaim pressure for socket memroy management. Note * that this indicator should NOT be used in legacy cgroup mode @@ -776,6 +773,20 @@ static inline void obj_cgroup_put(struct obj_cgroup *objcg) percpu_ref_put(&objcg->refcnt); } +static inline struct obj_cgroup *obj_cgroup_get_nid(struct obj_cgroup *objcg, + int nid) +{ + struct obj_cgroup *nid_objcg; + struct mem_cgroup *memcg; + + rcu_read_lock(); + memcg = obj_cgroup_memcg(objcg); + nid_objcg = rcu_dereference(memcg->nodeinfo[nid]->objcg); + rcu_read_unlock(); + + return nid_objcg; +} + static inline bool mem_cgroup_tryget(struct mem_cgroup *memcg) { return !memcg || css_tryget(&memcg->css); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 9adb2ad21da5..97eb168fd7f3 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -326,6 +326,7 @@ enum node_stat_item { #ifdef CONFIG_HUGETLB_PAGE NR_HUGETLB, #endif + NR_KMEM, NR_BALLOON_PAGES, NR_KERNEL_FILE_PAGES, NR_GPU_ACTIVE, /* Pages assigned to GPU objects */ diff --git a/mm/memcontrol.c b/mm/memcontrol.c index aaaa6a8b9f15..979a847e542a 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -136,6 +136,7 @@ bool mem_cgroup_kmem_disabled(void) } static void memcg_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages); +static void mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx, int val); static void obj_cgroup_release(struct percpu_ref *ref) { @@ -170,9 +171,11 @@ static void obj_cgroup_release(struct percpu_ref *ref) if (nr_pages) { struct mem_cgroup *memcg; + struct lruvec *lruvec; memcg = get_mem_cgroup_from_objcg(objcg); - mod_memcg_state(memcg, MEMCG_KMEM, -nr_pages); + lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(objcg->nid)); + mod_lruvec_state(lruvec, NR_KMEM, -nr_pages); memcg1_account_kmem(memcg, -nr_pages); if (!mem_cgroup_is_root(memcg)) memcg_uncharge(memcg, nr_pages); @@ -423,13 +426,13 @@ static const unsigned int memcg_node_stat_items[] = { #ifdef CONFIG_HUGETLB_PAGE NR_HUGETLB, #endif + NR_KMEM, }; static const unsigned int memcg_stat_items[] = { MEMCG_SWAP, MEMCG_SOCK, MEMCG_PERCPU_B, - MEMCG_KMEM, MEMCG_ZSWAP_B, MEMCG_ZSWAPPED, MEMCG_ZSWAP_INCOMP, @@ -1537,7 +1540,7 @@ struct memory_stat { static const struct memory_stat memory_stats[] = { { "anon", NR_ANON_MAPPED }, { "file", NR_FILE_PAGES }, - { "kernel", MEMCG_KMEM }, + { "kernel", NR_KMEM }, { "kernel_stack", NR_KERNEL_STACK_KB }, { "pagetables", NR_PAGETABLE }, { "sec_pagetables", NR_SECONDARY_PAGETABLE }, @@ -3004,20 +3007,26 @@ struct obj_cgroup *get_obj_cgroup_from_folio(struct folio *folio) } #ifdef CONFIG_MEMCG_NMI_SAFETY_REQUIRES_ATOMIC -static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val) +static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int nid, int val) { if (likely(!in_nmi())) { - mod_memcg_state(memcg, MEMCG_KMEM, val); + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); + + mod_lruvec_state(lruvec, NR_KMEM, val); } else { + struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid]; + /* preemption is disabled in_nmi(). */ css_rstat_updated(&memcg->css, smp_processor_id()); - atomic_add(val, &memcg->kmem_stat); + atomic_add(val, &pn->kmem); } } #else -static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int val) +static inline void account_kmem_nmi_safe(struct mem_cgroup *memcg, int nid, int val) { - mod_memcg_state(memcg, MEMCG_KMEM, val); + struct lruvec *lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid)); + + mod_lruvec_state(lruvec, NR_KMEM, val); } #endif @@ -3033,7 +3042,7 @@ static void obj_cgroup_uncharge_pages(struct obj_cgroup *objcg, memcg = get_mem_cgroup_from_objcg(objcg); - account_kmem_nmi_safe(memcg, -nr_pages); + account_kmem_nmi_safe(memcg, objcg->nid, -nr_pages); memcg1_account_kmem(memcg, -nr_pages); if (!mem_cgroup_is_root(memcg)) refill_stock(memcg, nr_pages); @@ -3061,7 +3070,7 @@ static int obj_cgroup_charge_pages(struct obj_cgroup *objcg, gfp_t gfp, if (ret) goto out; - account_kmem_nmi_safe(memcg, nr_pages); + account_kmem_nmi_safe(memcg, objcg->nid, nr_pages); memcg1_account_kmem(memcg, nr_pages); out: css_put(&memcg->css); @@ -3238,10 +3247,11 @@ static void drain_obj_stock(struct obj_stock_pcp *stock) if (nr_pages) { struct mem_cgroup *memcg; + struct lruvec *lruvec; memcg = get_mem_cgroup_from_objcg(old); - - mod_memcg_state(memcg, MEMCG_KMEM, -nr_pages); + lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(old->nid)); + mod_lruvec_state(lruvec, NR_KMEM, -nr_pages); memcg1_account_kmem(memcg, -nr_pages); if (!mem_cgroup_is_root(memcg)) memcg_uncharge(memcg, nr_pages); @@ -3250,7 +3260,7 @@ static void drain_obj_stock(struct obj_stock_pcp *stock) } /* - * The leftover is flushed to the centralized per-memcg value. + * The leftover is flushed to the per-node per-memcg value. * On the next attempt to refill obj stock it will be moved * to a per-cpu stock (probably, on an other CPU), see * refill_obj_stock(). @@ -3417,7 +3427,7 @@ void obj_cgroup_account_kmem(struct obj_cgroup *objcg, unsigned int nr_pages) rcu_read_lock(); memcg = obj_cgroup_memcg(objcg); - account_kmem_nmi_safe(memcg, nr_pages); + account_kmem_nmi_safe(memcg, objcg->nid, nr_pages); memcg1_account_kmem(memcg, nr_pages); rcu_read_unlock(); } @@ -4165,6 +4175,7 @@ static int mem_cgroup_css_online(struct cgroup_subsys_state *css) if (unlikely(mem_cgroup_is_root(memcg))) objcg->is_root = true; + objcg->nid = nid; objcg->memcg = memcg; rcu_assign_pointer(memcg->nodeinfo[nid]->objcg, objcg); obj_cgroup_get(objcg); @@ -4369,15 +4380,6 @@ static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent, { int nid; - if (atomic_read(&memcg->kmem_stat)) { - int kmem = atomic_xchg(&memcg->kmem_stat, 0); - int index = memcg_stats_index(MEMCG_KMEM); - - memcg->vmstats->state[index] += kmem; - if (parent) - parent->vmstats->state_pending[index] += kmem; - } - for_each_node_state(nid, N_MEMORY) { struct mem_cgroup_per_node *pn = memcg->nodeinfo[nid]; struct lruvec_stats *lstats = pn->lruvec_stats; @@ -4408,6 +4410,18 @@ static void flush_nmi_stats(struct mem_cgroup *memcg, struct mem_cgroup *parent, if (parent) parent->vmstats->state_pending[index] += slab; } + if (atomic_read(&pn->kmem)) { + int kmem = atomic_xchg(&pn->kmem, 0); + int index = memcg_stats_index(NR_KMEM); + + mod_node_page_state(NODE_DATA(nid), NR_KMEM, kmem); + lstats->state[index] += kmem; + memcg->vmstats->state[index] += kmem; + if (plstats) + plstats->state_pending[index] += kmem; + if (parent) + parent->vmstats->state_pending[index] += kmem; + } } } #else @@ -5173,7 +5187,9 @@ static void uncharge_batch(const struct uncharge_gather *ug) if (ug->nr_memory) { memcg_uncharge(memcg, ug->nr_memory); if (ug->nr_kmem) { - mod_memcg_state(memcg, MEMCG_KMEM, -ug->nr_kmem); + struct lruvec *lruvec = + mem_cgroup_lruvec(memcg, NODE_DATA(ug->objcg->nid)); + mod_lruvec_state(lruvec, NR_KMEM, -ug->nr_kmem); memcg1_account_kmem(memcg, -ug->nr_kmem); } memcg1_oom_recover(memcg); diff --git a/mm/vmstat.c b/mm/vmstat.c index f534972f517d..d55437d1852e 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1293,6 +1293,7 @@ const char * const vmstat_text[] = { #ifdef CONFIG_HUGETLB_PAGE [I(NR_HUGETLB)] = "nr_hugetlb", #endif + [I(NR_KMEM)] = "nr_kmem", [I(NR_BALLOON_PAGES)] = "nr_balloon_pages", [I(NR_KERNEL_FILE_PAGES)] = "nr_kernel_file_pages", [I(NR_GPU_ACTIVE)] = "nr_gpu_active", -- 2.54.0